Merge tag 'lsk-v4.4-16.07-android'

author Huang, Tao <huangtao@rock-chips.com>

Wed, 10 Aug 2016 07:15:47 +0000 (15:15 +0800)

committer Huang, Tao <huangtao@rock-chips.com>

Wed, 10 Aug 2016 07:15:47 +0000 (15:15 +0800)
author Huang, Tao <huangtao@rock-chips.com>
Wed, 10 Aug 2016 07:15:47 +0000 (15:15 +0800)
committer Huang, Tao <huangtao@rock-chips.com>
Wed, 10 Aug 2016 07:15:47 +0000 (15:15 +0800)
diff --combined Makefile

index 584eb44f8e65b5dba33140003df6d190a2eb9d31,da7621cadc8e5cca47e3eb2caff9b23e275083d6..bf85ba0fb248214768f2ee984e9e998c449fb7aa
--- 1/Makefile
--- 2/Makefile
+++ b/Makefile
@@@ -1,6 -1,6 +1,6 @@@
   VERSION = 4
   PATCHLEVEL = 4
- SUBLEVEL = 15
+ SUBLEVEL = 16
   EXTRAVERSION =
   NAME = Blurry Fish Butt
   
@@@ -248,13 -248,7 +248,13 @@@ SUBARCH := $(shell uname -m | sed -e s/
   # "make" in the configured kernel build directory always uses that.
   # Default value for CROSS_COMPILE is not to prefix executables
   # Note: Some architectures assign CROSS_COMPILE in their arch/*/Makefile
+ +ARCH          ?= arm64
   ARCH          ?= $(SUBARCH)
+ +ifeq ($(ARCH),arm64)
+ +ifneq ($(wildcard $(srctree)/../prebuilts/gcc/linux-x86/aarch64/aarch64-linux-android-4.9),)
+ +CROSS_COMPILE ?= $(srctree)/../prebuilts/gcc/linux-x86/aarch64/aarch64-linux-android-4.9/bin/aarch64-linux-android-
+ +endif
+ +endif
   CROSS_COMPILE ?= $(CONFIG_CROSS_COMPILE:"%"=%)
   
   # Architecture as present in compile.h
@@@ -363,12 -357,6 +363,12 @@@ PERL             = per
   PYTHON                = python
   CHECK         = sparse
   
+ +# Use the wrapper for the compiler. This wrapper scans for new
+ +# warnings and causes the build to stop upon encountering them.
+ +ifneq ($(wildcard $(srctree)/scripts/gcc-wrapper.py),)
+ +CC            = $(srctree)/scripts/gcc-wrapper.py $(CROSS_COMPILE)gcc
+ +endif
+ +
   CHECKFLAGS     := -D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ \
                   -Wbitwise -Wno-return-void $(CF)
   CFLAGS_MODULE   =
@@@ -787,11 -775,6 +787,11 @@@ KBUILD_ARFLAGS := $(call ar-option,D
   ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(CC)), y)
         KBUILD_CFLAGS += -DCC_HAVE_ASM_GOTO
         KBUILD_AFLAGS += -DCC_HAVE_ASM_GOTO
+ +else ifneq ($(findstring aarch64-linux-android, $(CROSS_COMPILE)),)
+ +# It seems than android gcc can't pass gcc-goto.sh check, but asm goto work.
+ +# So let's active it.
+ +      KBUILD_CFLAGS += -DCC_HAVE_ASM_GOTO
+ +      KBUILD_AFLAGS += -DCC_HAVE_ASM_GOTO
   endif
   
   include scripts/Makefile.kasan
diff --combined arch/arm/mach-mvebu/coherency.c

index 1ee9e3901a8ff871adba957de8f99c95d931de5b,feed36b32ff68474ba4bd84b3c8abcc7bf2318a0..a27be620440170bf0e5ae627884ec73dc18ef57f
--- 1/arch/arm/mach-mvebu/coherency.c
--- 2/arch/arm/mach-mvebu/coherency.c
+++ b/arch/arm/mach-mvebu/coherency.c
@@@ -98,7 -98,7 +98,7 @@@ static int mvebu_hwcc_notifier(struct n
   
         if (event != BUS_NOTIFY_ADD_DEVICE)
                 return NOTIFY_DONE;
- -      set_dma_ops(dev, &arm_coherent_dma_ops);
+ +      arch_set_dma_ops(dev, &arm_coherent_dma_ops);
   
         return NOTIFY_OK;
   }
@@@ -162,22 -162,16 +162,16 @@@ exit
   }
   
   /*
-  * This ioremap hook is used on Armada 375/38x to ensure that PCIe
-  * memory areas are mapped as MT_UNCACHED instead of MT_DEVICE. This
-  * is needed as a workaround for a deadlock issue between the PCIe
-  * interface and the cache controller.
+  * This ioremap hook is used on Armada 375/38x to ensure that all MMIO
+  * areas are mapped as MT_UNCACHED instead of MT_DEVICE. This is
+  * needed for the HW I/O coherency mechanism to work properly without
+  * deadlock.
    */
   static void __iomem *
- armada_pcie_wa_ioremap_caller(phys_addr_t phys_addr, size_t size,
-                             unsigned int mtype, void *caller)
+ armada_wa_ioremap_caller(phys_addr_t phys_addr, size_t size,
+                        unsigned int mtype, void *caller)
   {
-       struct resource pcie_mem;
- 
-       mvebu_mbus_get_pcie_mem_aperture(&pcie_mem);
- 
-       if (pcie_mem.start <= phys_addr && (phys_addr + size) <= pcie_mem.end)
-               mtype = MT_UNCACHED;
- 
+       mtype = MT_UNCACHED;
         return __arm_ioremap_caller(phys_addr, size, mtype, caller);
   }
   
@@@ -186,7 -180,7 +180,7 @@@ static void __init armada_375_380_coher
         struct device_node *cache_dn;
   
         coherency_cpu_base = of_iomap(np, 0);
-       arch_ioremap_caller = armada_pcie_wa_ioremap_caller;
+       arch_ioremap_caller = armada_wa_ioremap_caller;
   
         /*
          * We should switch the PL310 to I/O coherency mode only if
diff --combined drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c

index 1ffe9c329c46df9a7753291861368b67a3764325,59d1269626b15f17843ee3b4351c62de07173b3a..4dc90479568b24cc06b3724d3c47777460a4af94
--- 1/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
--- 2/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
+++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
@@@ -335,6 -335,8 +335,8 @@@ atmel_hlcdc_plane_update_pos_and_size(s
   
                 atmel_hlcdc_layer_update_cfg(&plane->layer, 13, 0xffffffff,
                                              factor_reg);
+       } else {
+               atmel_hlcdc_layer_update_cfg(&plane->layer, 13, 0xffffffff, 0);
         }
   }
   
@@@ -941,7 -943,7 +943,7 @@@ atmel_hlcdc_plane_create(struct drm_dev
         ret = drm_universal_plane_init(dev, &plane->base, 0,
                                        &layer_plane_funcs,
                                        desc->formats->formats,
- -                                     desc->formats->nformats, type);
+ +                                     desc->formats->nformats, type, NULL);
         if (ret)
                 return ERR_PTR(ret);
   
diff --combined drivers/gpu/drm/drm_atomic.c

index a4f9571caa3e77d6f471ca792d9e31bb7cf34387,6253775b8d9cc08b235cd880c56a465f98654545..8f9d344b2997c592b754d55a6e8284b1618a8f33
--- 1/drivers/gpu/drm/drm_atomic.c
--- 2/drivers/gpu/drm/drm_atomic.c
+++ b/drivers/gpu/drm/drm_atomic.c
@@@ -367,6 -367,8 +367,8 @@@ int drm_atomic_set_mode_prop_for_crtc(s
                 drm_property_unreference_blob(state->mode_blob);
         state->mode_blob = NULL;
   
+       memset(&state->mode, 0, sizeof(state->mode));
+ 
         if (blob) {
                 if (blob->length != sizeof(struct drm_mode_modeinfo) ||
                     drm_mode_convert_umode(&state->mode,
@@@ -379,7 -381,6 +381,6 @@@
                 DRM_DEBUG_ATOMIC("Set [MODE:%s] for CRTC state %p\n",
                                  state->mode.name, state);
         } else {
-               memset(&state->mode, 0, sizeof(state->mode));
                 state->enable = false;
                 DRM_DEBUG_ATOMIC("Set [NOMODE] for CRTC state %p\n",
                                  state);
@@@ -1191,7 -1192,12 +1192,7 @@@ void drm_atomic_legacy_backoff(struct d
   retry:
         drm_modeset_backoff(state->acquire_ctx);
   
- -      ret = drm_modeset_lock(&state->dev->mode_config.connection_mutex,
- -                             state->acquire_ctx);
- -      if (ret)
- -              goto retry;
- -      ret = drm_modeset_lock_all_crtcs(state->dev,
- -                                       state->acquire_ctx);
+ +      ret = drm_modeset_lock_all_ctx(state->dev, state->acquire_ctx);
         if (ret)
                 goto retry;
   }
diff --combined drivers/gpu/drm/drm_crtc.c

index 35af30cedb83237a14187d737716954a264790bb,dc84003f694e1bb12d6e72568f15707a76a5b1c7..4900d2f76e2494a37fc0940290c55493ae4cfb44
--- 1/drivers/gpu/drm/drm_crtc.c
--- 2/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@@ -657,7 -657,6 +657,7 @@@ DEFINE_WW_CLASS(crtc_ww_class)
    * @primary: Primary plane for CRTC
    * @cursor: Cursor plane for CRTC
    * @funcs: callbacks for the new CRTC
+ + * @name: printf style format string for the CRTC name, or NULL for default name
    *
    * Inits a new object created as base part of a driver crtc object.
    *
@@@ -667,8 -666,7 +667,8 @@@
   int drm_crtc_init_with_planes(struct drm_device *dev, struct drm_crtc *crtc,
                               struct drm_plane *primary,
                               struct drm_plane *cursor,
- -                            const struct drm_crtc_funcs *funcs)
+ +                            const struct drm_crtc_funcs *funcs,
+ +                            const char *name, ...)
   {
         struct drm_mode_config *config = &dev->mode_config;
         int ret;
@@@ -1077,7 -1075,6 +1077,7 @@@ EXPORT_SYMBOL(drm_connector_unplug_all)
    * @encoder: the encoder to init
    * @funcs: callbacks for this encoder
    * @encoder_type: user visible type of the encoder
+ + * @name: printf style format string for the encoder name, or NULL for default name
    *
    * Initialises a preallocated encoder. Encoder should be
    * subclassed as part of driver encoder objects.
@@@ -1088,7 -1085,7 +1088,7 @@@
   int drm_encoder_init(struct drm_device *dev,
                       struct drm_encoder *encoder,
                       const struct drm_encoder_funcs *funcs,
- -                    int encoder_type)
+ +                    int encoder_type, const char *name, ...)
   {
         int ret;
   
@@@ -1153,7 -1150,6 +1153,7 @@@ EXPORT_SYMBOL(drm_encoder_cleanup)
    * @formats: array of supported formats (%DRM_FORMAT_*)
    * @format_count: number of elements in @formats
    * @type: type of plane (overlay, primary, cursor)
+ + * @name: printf style format string for the plane name, or NULL for default name
    *
    * Initializes a plane object of type @type.
    *
@@@ -1164,8 -1160,7 +1164,8 @@@ int drm_universal_plane_init(struct drm
                              unsigned long possible_crtcs,
                              const struct drm_plane_funcs *funcs,
                              const uint32_t *formats, unsigned int format_count,
- -                           enum drm_plane_type type)
+ +                           enum drm_plane_type type,
+ +                           const char *name, ...)
   {
         struct drm_mode_config *config = &dev->mode_config;
         int ret;
@@@ -1245,100 -1240,10 +1245,100 @@@ int drm_plane_init(struct drm_device *d
   
         type = is_primary ? DRM_PLANE_TYPE_PRIMARY : DRM_PLANE_TYPE_OVERLAY;
         return drm_universal_plane_init(dev, plane, possible_crtcs, funcs,
- -                                      formats, format_count, type);
+ +                                      formats, format_count, type, NULL);
   }
   EXPORT_SYMBOL(drm_plane_init);
   
+ +/**
+ + * drm_share_plane_init - Initialize a share plane
+ + * @dev: DRM device
+ + * @plane: plane object to init
+ + * @parent: this plane share some resources with parent plane.
+ + * @possible_crtcs: bitmask of possible CRTCs
+ + * @funcs: callbacks for the new plane
+ + * @formats: array of supported formats (%DRM_FORMAT_*)
+ + * @format_count: number of elements in @formats
+ + * @type: type of plane (overlay, primary, cursor)
+ + *
+ + * With this API, the plane can share hardware resources with other planes.
+ + *
+ + *   --------------------------------------------------
+ + *   |  scanout                                       |
+ + *   |         ------------------                     |
+ + *   |         |  parent plane  |                     |
+ + *   |         | active scanout |                     |
+ + *   |         |                |   ----------------- |
+ + *   |         ------------------   | share plane 1 | |
+ + *   |  -----------------           |active scanout | |
+ + *   |  | share plane 0 |           |               | |
+ + *   |  |active scanout |           ----------------- |
+ + *   |  |               |                             |
+ + *   |  -----------------                             |
+ + *   --------------------------------------------------
+ + *
+ + *    parent plane
+ + *        |---share plane 0
+ + *        |---share plane 1
+ + *        ...
+ + *
+ + * The plane hardware is used when the display scanout run into plane active
+ + * scanout, that means we can reuse the plane hardware resources on plane
+ + * non-active scanout.
+ + *
+ + * Because resource share, There are some limit on share plane: one group
+ + * of share planes need use same zpos, can't not overlap, etc.
+ + *
+ + * Here assume share plane is a universal plane with some limit flags.
+ + * people who use the share plane need know the limit, should call the ioctl
+ + * DRM_CLIENT_CAP_SHARE_PLANES, and judge the planes limit before use it.
+ + *
+ + * Returns:
+ + * Zero on success, error code on failure.
+ + */
+ +
+ +int drm_share_plane_init(struct drm_device *dev, struct drm_plane *plane,
+ +                       struct drm_plane *parent,
+ +                       unsigned long possible_crtcs,
+ +                       const struct drm_plane_funcs *funcs,
+ +                       const uint32_t *formats, unsigned int format_count,
+ +                       enum drm_plane_type type)
+ +{
+ +      struct drm_mode_config *config = &dev->mode_config;
+ +      int ret;
+ +      int share_id;
+ +
+ +      /*
+ +       * TODO: only verified on ATOMIC drm driver.
+ +       */
+ +      if (!drm_core_check_feature(dev, DRIVER_ATOMIC))
+ +              return -EINVAL;
+ +
+ +      ret = drm_universal_plane_init(dev, plane, possible_crtcs, funcs,
+ +                                     formats, format_count, type, NULL);
+ +      if (ret)
+ +              return ret;
+ +
+ +      if (parent) {
+ +              /*
+ +               * Can't support more than two level plane share.
+ +               */
+ +              WARN_ON(parent->parent);
+ +              share_id = parent->base.id;
+ +              plane->parent = parent;
+ +
+ +              config->num_share_plane++;
+ +              if (plane->type == DRM_PLANE_TYPE_OVERLAY)
+ +                      config->num_share_overlay_plane++;
+ +      } else {
+ +              share_id = plane->base.id;
+ +      }
+ +
+ +      drm_object_attach_property(&plane->base,
+ +                                 config->prop_share_id, share_id);
+ +      return 0;
+ +}
+ +EXPORT_SYMBOL(drm_share_plane_init);
+ +
   /**
    * drm_plane_cleanup - Clean up the core plane usage
    * @plane: plane to cleanup
@@@ -1361,11 -1266,6 +1361,11 @@@ void drm_plane_cleanup(struct drm_plan
         dev->mode_config.num_total_plane--;
         if (plane->type == DRM_PLANE_TYPE_OVERLAY)
                 dev->mode_config.num_overlay_plane--;
+ +      if (plane->parent) {
+ +              dev->mode_config.num_share_plane--;
+ +              if (plane->type == DRM_PLANE_TYPE_OVERLAY)
+ +                      dev->mode_config.num_share_overlay_plane--;
+ +      }
         drm_modeset_unlock_all(dev);
   
         WARN_ON(plane->state && !plane->funcs->atomic_destroy_state);
@@@ -1497,18 -1397,6 +1497,18 @@@ static int drm_mode_create_standard_pro
                 return -ENOMEM;
         dev->mode_config.plane_type_property = prop;
   
+ +      prop = drm_property_create_range(dev, DRM_MODE_PROP_IMMUTABLE,
+ +                                       "SHARE_ID", 0, UINT_MAX);
+ +      if (!prop)
+ +              return -ENOMEM;
+ +
+ +      dev->mode_config.prop_share_id = prop;
+ +      prop = drm_property_create_range(dev, DRM_MODE_PROP_IMMUTABLE,
+ +                                       "SHARE_FLAGS", 0, UINT_MAX);
+ +      if (!prop)
+ +              return -ENOMEM;
+ +      dev->mode_config.prop_share_flags = prop;
+ +
         prop = drm_property_create_range(dev, DRM_MODE_PROP_ATOMIC,
                         "SRC_X", 0, UINT_MAX);
         if (!prop)
@@@ -2315,12 -2203,6 +2315,12 @@@ int drm_mode_getplane_res(struct drm_de
                 num_planes = config->num_total_plane;
         else
                 num_planes = config->num_overlay_plane;
+ +      if (!file_priv->share_planes) {
+ +              if (file_priv->universal_planes)
+ +                      num_planes -= config->num_share_plane;
+ +              else
+ +                      num_planes -= config->num_share_overlay_plane;
+ +      }
   
         /*
          * This ioctl is called twice, once to determine how much space is
@@@ -2339,8 -2221,6 +2339,8 @@@
                         if (plane->type != DRM_PLANE_TYPE_OVERLAY &&
                             !file_priv->universal_planes)
                                 continue;
+ +                      if (plane->parent && !file_priv->share_planes)
+ +                              continue;
   
                         if (put_user(plane->base.id, plane_ptr + copied))
                                 return -EFAULT;
@@@ -2802,8 -2682,6 +2802,6 @@@ int drm_mode_setcrtc(struct drm_device 
                         goto out;
                 }
   
-               drm_mode_set_crtcinfo(mode, CRTC_INTERLACE_HALVE_V);
- 
                 /*
                  * Check whether the primary plane supports the fb pixel format.
                  * Drivers not implementing the universal planes API use a
@@@ -5153,20 -5031,6 +5151,20 @@@ int drm_mode_connector_attach_encoder(s
   {
         int i;
   
+ +      /*
+ +       * In the past, drivers have attempted to model the static association
+ +       * of connector to encoder in simple connector/encoder devices using a
+ +       * direct assignment of connector->encoder = encoder. This connection
+ +       * is a logical one and the responsibility of the core, so drivers are
+ +       * expected not to mess with this.
+ +       *
+ +       * Note that the error return should've been enough here, but a large
+ +       * majority of drivers ignores the return value, so add in a big WARN
+ +       * to get people's attention.
+ +       */
+ +      if (WARN_ON(connector->encoder))
+ +              return -EINVAL;
+ +
         for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++) {
                 if (connector->encoder_ids[i] == 0) {
                         connector->encoder_ids[i] = encoder->base.id;
diff --combined drivers/gpu/drm/i915/intel_display.c

index f408eac04856338c578925af2e7f660edf60c34e,c41bc42b6fa707c95400f00430f29fba403dce1a..240392ce305d2e68849b409712af801080518a4b
--- 1/drivers/gpu/drm/i915/intel_display.c
--- 2/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@@ -8228,12 -8228,14 +8228,14 @@@ static void ironlake_init_pch_refclk(st
   {
         struct drm_i915_private *dev_priv = dev->dev_private;
         struct intel_encoder *encoder;
+       int i;
         u32 val, final;
         bool has_lvds = false;
         bool has_cpu_edp = false;
         bool has_panel = false;
         bool has_ck505 = false;
         bool can_ssc = false;
+       bool using_ssc_source = false;
   
         /* We need to take the global config into account */
         for_each_intel_encoder(dev, encoder) {
@@@ -8260,8 -8262,22 +8262,22 @@@
                 can_ssc = true;
         }
   
-       DRM_DEBUG_KMS("has_panel %d has_lvds %d has_ck505 %d\n",
-                     has_panel, has_lvds, has_ck505);
+       /* Check if any DPLLs are using the SSC source */
+       for (i = 0; i < dev_priv->num_shared_dpll; i++) {
+               u32 temp = I915_READ(PCH_DPLL(i));
+ 
+               if (!(temp & DPLL_VCO_ENABLE))
+                       continue;
+ 
+               if ((temp & PLL_REF_INPUT_MASK) ==
+                   PLLB_REF_INPUT_SPREADSPECTRUMIN) {
+                       using_ssc_source = true;
+                       break;
+               }
+       }
+ 
+       DRM_DEBUG_KMS("has_panel %d has_lvds %d has_ck505 %d using_ssc_source %d\n",
+                     has_panel, has_lvds, has_ck505, using_ssc_source);
   
         /* Ironlake: try to setup display ref clock before DPLL
          * enabling. This is only under driver's control after
@@@ -8298,9 -8314,9 +8314,9 @@@
                                 final |= DREF_CPU_SOURCE_OUTPUT_NONSPREAD;
                 } else
                         final |= DREF_CPU_SOURCE_OUTPUT_DISABLE;
-       } else {
-               final |= DREF_SSC_SOURCE_DISABLE;
-               final |= DREF_CPU_SOURCE_OUTPUT_DISABLE;
+       } else if (using_ssc_source) {
+               final |= DREF_SSC_SOURCE_ENABLE;
+               final |= DREF_SSC1_ENABLE;
         }
   
         if (final == val)
@@@ -8346,7 -8362,7 +8362,7 @@@
                 POSTING_READ(PCH_DREF_CONTROL);
                 udelay(200);
         } else {
-               DRM_DEBUG_KMS("Disabling SSC entirely\n");
+               DRM_DEBUG_KMS("Disabling CPU source output\n");
   
                 val &= ~DREF_CPU_SOURCE_OUTPUT_MASK;
   
@@@ -8357,16 -8373,20 +8373,20 @@@
                 POSTING_READ(PCH_DREF_CONTROL);
                 udelay(200);
   
-               /* Turn off the SSC source */
-               val &= ~DREF_SSC_SOURCE_MASK;
-               val |= DREF_SSC_SOURCE_DISABLE;
+               if (!using_ssc_source) {
+                       DRM_DEBUG_KMS("Disabling SSC source\n");
   
-               /* Turn off SSC1 */
-               val &= ~DREF_SSC1_ENABLE;
+                       /* Turn off the SSC source */
+                       val &= ~DREF_SSC_SOURCE_MASK;
+                       val |= DREF_SSC_SOURCE_DISABLE;
   
-               I915_WRITE(PCH_DREF_CONTROL, val);
-               POSTING_READ(PCH_DREF_CONTROL);
-               udelay(200);
+                       /* Turn off SSC1 */
+                       val &= ~DREF_SSC1_ENABLE;
+ 
+                       I915_WRITE(PCH_DREF_CONTROL, val);
+                       POSTING_READ(PCH_DREF_CONTROL);
+                       udelay(200);
+               }
         }
   
         BUG_ON(val != final);
@@@ -9669,6 -9689,8 +9689,8 @@@ static void broadwell_set_cdclk(struct 
         sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ, data);
         mutex_unlock(&dev_priv->rps.hw_lock);
   
+       I915_WRITE(CDCLK_FREQ, DIV_ROUND_CLOSEST(cdclk, 1000) - 1);
+ 
         intel_update_cdclk(dev);
   
         WARN(cdclk != dev_priv->cdclk_freq,
@@@ -13707,7 -13729,7 +13729,7 @@@ static struct drm_plane *intel_primary_
         drm_universal_plane_init(dev, &primary->base, 0,
                                  &intel_plane_funcs,
                                  intel_primary_formats, num_formats,
- -                               DRM_PLANE_TYPE_PRIMARY);
+ +                               DRM_PLANE_TYPE_PRIMARY, NULL);
   
         if (INTEL_INFO(dev)->gen >= 4)
                 intel_create_rotation_property(dev, primary);
@@@ -13859,7 -13881,7 +13881,7 @@@ static struct drm_plane *intel_cursor_p
                                  &intel_plane_funcs,
                                  intel_cursor_formats,
                                  ARRAY_SIZE(intel_cursor_formats),
- -                               DRM_PLANE_TYPE_CURSOR);
+ +                               DRM_PLANE_TYPE_CURSOR, NULL);
   
         if (INTEL_INFO(dev)->gen >= 4) {
                 if (!dev->mode_config.rotation_property)
@@@ -13936,7 -13958,7 +13958,7 @@@ static void intel_crtc_init(struct drm_
                 goto fail;
   
         ret = drm_crtc_init_with_planes(dev, &intel_crtc->base, primary,
- -                                      cursor, &intel_crtc_funcs);
+ +                                      cursor, &intel_crtc_funcs, NULL);
         if (ret)
                 goto fail;
   
diff --combined drivers/gpu/drm/i915/intel_dp.c

index 883da2d3be0c887644231c75f558accc11203872,8e1d6d74c203e1d91669d92c8a45fe35c352f9d2..0ff09fe97a462733423b868152fd78281e2ed9e8
--- 1/drivers/gpu/drm/i915/intel_dp.c
--- 2/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@@ -3628,8 -3628,7 +3628,7 @@@ static boo
   intel_dp_reset_link_train(struct intel_dp *intel_dp, uint32_t *DP,
                         uint8_t dp_train_pat)
   {
-       if (!intel_dp->train_set_valid)
-               memset(intel_dp->train_set, 0, sizeof(intel_dp->train_set));
+       memset(intel_dp->train_set, 0, sizeof(intel_dp->train_set));
         intel_dp_set_signal_levels(intel_dp, DP);
         return intel_dp_set_link_train(intel_dp, DP, dp_train_pat);
   }
@@@ -3746,22 -3745,6 +3745,6 @@@ intel_dp_link_training_clock_recovery(s
                         break;
                 }
   
-               /*
-                * if we used previously trained voltage and pre-emphasis values
-                * and we don't get clock recovery, reset link training values
-                */
-               if (intel_dp->train_set_valid) {
-                       DRM_DEBUG_KMS("clock recovery not ok, reset");
-                       /* clear the flag as we are not reusing train set */
-                       intel_dp->train_set_valid = false;
-                       if (!intel_dp_reset_link_train(intel_dp, &DP,
-                                                      DP_TRAINING_PATTERN_1 |
-                                                      DP_LINK_SCRAMBLING_DISABLE)) {
-                               DRM_ERROR("failed to enable link training\n");
-                               return;
-                       }
-                       continue;
-               }
   
                 /* Check to see if we've tried the max voltage */
                 for (i = 0; i < intel_dp->lane_count; i++)
@@@ -3854,7 -3837,6 +3837,6 @@@ intel_dp_link_training_channel_equaliza
                 /* Make sure clock is still ok */
                 if (!drm_dp_clock_recovery_ok(link_status,
                                               intel_dp->lane_count)) {
-                       intel_dp->train_set_valid = false;
                         intel_dp_link_training_clock_recovery(intel_dp);
                         intel_dp_set_link_train(intel_dp, &DP,
                                                 training_pattern |
@@@ -3871,7 -3853,6 +3853,6 @@@
   
                 /* Try 5 times, then try clock recovery if that fails */
                 if (tries > 5) {
-                       intel_dp->train_set_valid = false;
                         intel_dp_link_training_clock_recovery(intel_dp);
                         intel_dp_set_link_train(intel_dp, &DP,
                                                 training_pattern |
@@@ -3893,10 -3874,8 +3874,8 @@@
   
         intel_dp->DP = DP;
   
-       if (channel_eq) {
-               intel_dp->train_set_valid = true;
+       if (channel_eq)
                 DRM_DEBUG_KMS("Channel EQ done. DP Training successful\n");
-       }
   }
   
   void intel_dp_stop_link_train(struct intel_dp *intel_dp)
@@@ -5079,13 -5058,15 +5058,15 @@@ static void intel_edp_panel_vdd_sanitiz
   
   void intel_dp_encoder_reset(struct drm_encoder *encoder)
   {
-       struct intel_dp *intel_dp;
+       struct drm_i915_private *dev_priv = to_i915(encoder->dev);
+       struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
+ 
+       if (!HAS_DDI(dev_priv))
+               intel_dp->DP = I915_READ(intel_dp->output_reg);
   
         if (to_intel_encoder(encoder)->type != INTEL_OUTPUT_EDP)
                 return;
   
-       intel_dp = enc_to_intel_dp(encoder);
- 
         pps_lock(intel_dp);
   
         /*
@@@ -5157,9 -5138,6 +5138,6 @@@ intel_dp_hpd_pulse(struct intel_digital
         intel_display_power_get(dev_priv, power_domain);
   
         if (long_hpd) {
-               /* indicate that we need to restart link training */
-               intel_dp->train_set_valid = false;
- 
                 if (!intel_digital_port_connected(dev_priv, intel_dig_port))
                         goto mst_fail;
   
@@@ -6156,7 -6134,7 +6134,7 @@@ intel_dp_init(struct drm_device *dev, i
         encoder = &intel_encoder->base;
   
         drm_encoder_init(dev, &intel_encoder->base, &intel_dp_enc_funcs,
- -                       DRM_MODE_ENCODER_TMDS);
+ +                       DRM_MODE_ENCODER_TMDS, NULL);
   
         intel_encoder->compute_config = intel_dp_compute_config;
         intel_encoder->disable = intel_disable_dp;
diff --combined drivers/gpu/drm/mgag200/mgag200_mode.c

index 31802128dfbb59e8fec3e1bf4264df16968dc17b,e5bb40e58020931af0f15257a4d2b9fe755f9182..267f7a1bf12e749a605b7a94aa047e7a8b29468c
--- 1/drivers/gpu/drm/mgag200/mgag200_mode.c
--- 2/drivers/gpu/drm/mgag200/mgag200_mode.c
+++ b/drivers/gpu/drm/mgag200/mgag200_mode.c
@@@ -194,7 -194,7 +194,7 @@@ static int mga_g200se_set_plls(struct m
                         }
                 }
   
-               fvv = pllreffreq * testn / testm;
+               fvv = pllreffreq * (n + 1) / (m + 1);
                 fvv = (fvv - 800000) / 50000;
   
                 if (fvv > 15)
@@@ -214,6 -214,14 +214,14 @@@
         WREG_DAC(MGA1064_PIX_PLLC_M, m);
         WREG_DAC(MGA1064_PIX_PLLC_N, n);
         WREG_DAC(MGA1064_PIX_PLLC_P, p);
+ 
+       if (mdev->unique_rev_id >= 0x04) {
+               WREG_DAC(0x1a, 0x09);
+               msleep(20);
+               WREG_DAC(0x1a, 0x01);
+ 
+       }
+ 
         return 0;
   }
   
@@@ -1538,7 -1546,7 +1546,7 @@@ static struct drm_encoder *mga_encoder_
         encoder->possible_crtcs = 0x1;
   
         drm_encoder_init(dev, encoder, &mga_encoder_encoder_funcs,
- -                       DRM_MODE_ENCODER_DAC);
+ +                       DRM_MODE_ENCODER_DAC, NULL);
         drm_encoder_helper_add(encoder, &mga_encoder_helper_funcs);
   
         return encoder;
diff --combined kernel/sched/fair.c

index 67629fa15d58e44b72c428afdb37cf31024381d5,b46edb7447bb5e87794fa693bcd7fa91195c9760..c51be77f93930c5558d9d50237cdb78f2540b0c6
--- 1/kernel/sched/fair.c
--- 2/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@@ -34,7 -34,6 +34,7 @@@
   #include <trace/events/sched.h>
   
   #include "sched.h"
+ +#include "tune.h"
   
   /*
    * Targeted preemption latency for CPU-bound tasks:
@@@ -2587,7 -2586,6 +2587,7 @@@ __update_load_avg(u64 now, int cpu, str
   
         scale_freq = arch_scale_freq_capacity(NULL, cpu);
         scale_cpu = arch_scale_cpu_capacity(NULL, cpu);
+ +      trace_sched_contrib_scale_f(cpu, scale_freq, scale_cpu);
   
         /* delta_w is the amount already accumulated against our next period */
         delta_w = sa->period_contrib;
@@@ -2684,6 -2682,23 +2684,23 @@@ static inline void update_tg_load_avg(s
   
   static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq);
   
+ /*
+  * Unsigned subtract and clamp on underflow.
+  *
+  * Explicitly do a load-store to ensure the intermediate value never hits
+  * memory. This allows lockless observations without ever seeing the negative
+  * values.
+  */
+ #define sub_positive(_ptr, _val) do {                         \
+       typeof(_ptr) ptr = (_ptr);                              \
+       typeof(*ptr) val = (_val);                              \
+       typeof(*ptr) res, var = READ_ONCE(*ptr);                \
+       res = var - val;                                        \
+       if (res > var)                                          \
+               res = 0;                                        \
+       WRITE_ONCE(*ptr, res);                                  \
+ } while (0)
+ 
   /* Group cfs_rq's load_avg is used for task_h_load and update_cfs_share */
   static inline int update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
   {
@@@ -2692,15 -2707,15 +2709,15 @@@
   
         if (atomic_long_read(&cfs_rq->removed_load_avg)) {
                 s64 r = atomic_long_xchg(&cfs_rq->removed_load_avg, 0);
-               sa->load_avg = max_t(long, sa->load_avg - r, 0);
-               sa->load_sum = max_t(s64, sa->load_sum - r * LOAD_AVG_MAX, 0);
+               sub_positive(&sa->load_avg, r);
+               sub_positive(&sa->load_sum, r * LOAD_AVG_MAX);
                 removed = 1;
         }
   
         if (atomic_long_read(&cfs_rq->removed_util_avg)) {
                 long r = atomic_long_xchg(&cfs_rq->removed_util_avg, 0);
-               sa->util_avg = max_t(long, sa->util_avg - r, 0);
-               sa->util_sum = max_t(s32, sa->util_sum - r * LOAD_AVG_MAX, 0);
+               sub_positive(&sa->util_avg, r);
+               sub_positive(&sa->util_sum, r * LOAD_AVG_MAX);
         }
   
         decayed = __update_load_avg(now, cpu_of(rq_of(cfs_rq)), sa,
@@@ -2731,10 -2746,6 +2748,10 @@@ static inline void update_load_avg(stru
   
         if (update_cfs_rq_load_avg(now, cfs_rq) && update_tg)
                 update_tg_load_avg(cfs_rq, 0);
+ +
+ +      if (entity_is_task(se))
+ +              trace_sched_load_avg_task(task_of(se), &se->avg);
+ +      trace_sched_load_avg_cpu(cpu, cfs_rq);
   }
   
   static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
@@@ -2770,10 -2781,10 +2787,10 @@@ static void detach_entity_load_avg(stru
                           &se->avg, se->on_rq * scale_load_down(se->load.weight),
                           cfs_rq->curr == se, NULL);
   
-       cfs_rq->avg.load_avg = max_t(long, cfs_rq->avg.load_avg - se->avg.load_avg, 0);
-       cfs_rq->avg.load_sum = max_t(s64,  cfs_rq->avg.load_sum - se->avg.load_sum, 0);
-       cfs_rq->avg.util_avg = max_t(long, cfs_rq->avg.util_avg - se->avg.util_avg, 0);
-       cfs_rq->avg.util_sum = max_t(s32,  cfs_rq->avg.util_sum - se->avg.util_sum, 0);
+       sub_positive(&cfs_rq->avg.load_avg, se->avg.load_avg);
+       sub_positive(&cfs_rq->avg.load_sum, se->avg.load_sum);
+       sub_positive(&cfs_rq->avg.util_avg, se->avg.util_avg);
+       sub_positive(&cfs_rq->avg.util_sum, se->avg.util_sum);
   }
   
   /* Add the load generated by se into cfs_rq's load average */
@@@ -2815,45 -2826,27 +2832,45 @@@ dequeue_entity_load_avg(struct cfs_rq *
                 max_t(s64,  cfs_rq->runnable_load_sum - se->avg.load_sum, 0);
   }
   
- -/*
- - * Task first catches up with cfs_rq, and then subtract
- - * itself from the cfs_rq (task must be off the queue now).
- - */
- -void remove_entity_load_avg(struct sched_entity *se)
- -{
- -      struct cfs_rq *cfs_rq = cfs_rq_of(se);
- -      u64 last_update_time;
- -
   #ifndef CONFIG_64BIT
+ +static inline u64 cfs_rq_last_update_time(struct cfs_rq *cfs_rq)
+ +{
         u64 last_update_time_copy;
+ +      u64 last_update_time;
   
         do {
                 last_update_time_copy = cfs_rq->load_last_update_time_copy;
                 smp_rmb();
                 last_update_time = cfs_rq->avg.last_update_time;
         } while (last_update_time != last_update_time_copy);
+ +
+ +      return last_update_time;
+ +}
   #else
- -      last_update_time = cfs_rq->avg.last_update_time;
+ +static inline u64 cfs_rq_last_update_time(struct cfs_rq *cfs_rq)
+ +{
+ +      return cfs_rq->avg.last_update_time;
+ +}
   #endif
   
+ +/*
+ + * Task first catches up with cfs_rq, and then subtract
+ + * itself from the cfs_rq (task must be off the queue now).
+ + */
+ +void remove_entity_load_avg(struct sched_entity *se)
+ +{
+ +      struct cfs_rq *cfs_rq = cfs_rq_of(se);
+ +      u64 last_update_time;
+ +
+ +      /*
+ +       * Newly created task or never used group entity should not be removed
+ +       * from its (source) cfs_rq
+ +       */
+ +      if (se->avg.last_update_time == 0)
+ +              return;
+ +
+ +      last_update_time = cfs_rq_last_update_time(cfs_rq);
+ +
         __update_load_avg(last_update_time, cpu_of(rq_of(cfs_rq)), &se->avg, 0, 0, NULL);
         atomic_long_add(se->avg.load_avg, &cfs_rq->removed_load_avg);
         atomic_long_add(se->avg.util_avg, &cfs_rq->removed_util_avg);
@@@ -4151,23 -4144,6 +4168,23 @@@ static inline void hrtick_update(struc
   }
   #endif
   
+ +static inline unsigned long boosted_cpu_util(int cpu);
+ +
+ +static void update_capacity_of(int cpu)
+ +{
+ +      unsigned long req_cap;
+ +
+ +      if (!sched_freq())
+ +              return;
+ +
+ +      /* Convert scale-invariant capacity to cpu. */
+ +      req_cap = boosted_cpu_util(cpu);
+ +      req_cap = req_cap * SCHED_CAPACITY_SCALE / capacity_orig_of(cpu);
+ +      set_cfs_cpu_capacity(cpu, true, req_cap);
+ +}
+ +
+ +static bool cpu_overutilized(int cpu);
+ +
   /*
    * The enqueue_task method is called before nr_running is
    * increased. Here we update the fair scheduling stats and
@@@ -4178,8 -4154,6 +4195,8 @@@ enqueue_task_fair(struct rq *rq, struc
   {
         struct cfs_rq *cfs_rq;
         struct sched_entity *se = &p->se;
+ +      int task_new = flags & ENQUEUE_WAKEUP_NEW;
+ +      int task_wakeup = flags & ENQUEUE_WAKEUP;
   
         for_each_sched_entity(se) {
                 if (se->on_rq)
@@@ -4211,24 -4185,9 +4228,24 @@@
                 update_cfs_shares(cfs_rq);
         }
   
- -      if (!se)
+ +      if (!se) {
                 add_nr_running(rq, 1);
+ +              if (!task_new && !rq->rd->overutilized &&
+ +                  cpu_overutilized(rq->cpu))
+ +                      rq->rd->overutilized = true;
   
+ +              schedtune_enqueue_task(p, cpu_of(rq));
+ +
+ +              /*
+ +               * We want to potentially trigger a freq switch
+ +               * request only for tasks that are waking up; this is
+ +               * because we get here also during load balancing, but
+ +               * in these cases it seems wise to trigger as single
+ +               * request after load balancing is done.
+ +               */
+ +              if (task_new || task_wakeup)
+ +                      update_capacity_of(cpu_of(rq));
+ +      }
         hrtick_update(rq);
   }
   
@@@ -4286,25 -4245,9 +4303,25 @@@ static void dequeue_task_fair(struct r
                 update_cfs_shares(cfs_rq);
         }
   
- -      if (!se)
+ +      if (!se) {
                 sub_nr_running(rq, 1);
+ +              schedtune_dequeue_task(p, cpu_of(rq));
   
+ +              /*
+ +               * We want to potentially trigger a freq switch
+ +               * request only for tasks that are going to sleep;
+ +               * this is because we get here also during load
+ +               * balancing, but in these cases it seems wise to
+ +               * trigger as single request after load balancing is
+ +               * done.
+ +               */
+ +              if (task_sleep) {
+ +                      if (rq->cfs.nr_running)
+ +                              update_capacity_of(cpu_of(rq));
+ +                      else if (sched_freq())
+ +                              set_cfs_cpu_capacity(cpu_of(rq), false, 0);
+ +              }
+ +      }
         hrtick_update(rq);
   }
   
@@@ -4531,6 -4474,15 +4548,6 @@@ static unsigned long target_load(int cp
         return max(rq->cpu_load[type-1], total);
   }
   
- -static unsigned long capacity_of(int cpu)
- -{
- -      return cpu_rq(cpu)->cpu_capacity;
- -}
- -
- -static unsigned long capacity_orig_of(int cpu)
- -{
- -      return cpu_rq(cpu)->cpu_capacity_orig;
- -}
   
   static unsigned long cpu_avg_load_per_task(int cpu)
   {
@@@ -4699,357 -4651,6 +4716,357 @@@ static long effective_load(struct task_
   
   #endif
   
+ +/*
+ + * Returns the current capacity of cpu after applying both
+ + * cpu and freq scaling.
+ + */
+ +unsigned long capacity_curr_of(int cpu)
+ +{
+ +      return cpu_rq(cpu)->cpu_capacity_orig *
+ +             arch_scale_freq_capacity(NULL, cpu)
+ +             >> SCHED_CAPACITY_SHIFT;
+ +}
+ +
+ +static inline bool energy_aware(void)
+ +{
+ +      return sched_feat(ENERGY_AWARE);
+ +}
+ +
+ +struct energy_env {
+ +      struct sched_group      *sg_top;
+ +      struct sched_group      *sg_cap;
+ +      int                     cap_idx;
+ +      int                     util_delta;
+ +      int                     src_cpu;
+ +      int                     dst_cpu;
+ +      int                     energy;
+ +      int                     payoff;
+ +      struct task_struct      *task;
+ +      struct {
+ +              int before;
+ +              int after;
+ +              int delta;
+ +              int diff;
+ +      } nrg;
+ +      struct {
+ +              int before;
+ +              int after;
+ +              int delta;
+ +      } cap;
+ +};
+ +
+ +/*
+ + * __cpu_norm_util() returns the cpu util relative to a specific capacity,
+ + * i.e. it's busy ratio, in the range [0..SCHED_LOAD_SCALE] which is useful for
+ + * energy calculations. Using the scale-invariant util returned by
+ + * cpu_util() and approximating scale-invariant util by:
+ + *
+ + *   util ~ (curr_freq/max_freq)*1024 * capacity_orig/1024 * running_time/time
+ + *
+ + * the normalized util can be found using the specific capacity.
+ + *
+ + *   capacity = capacity_orig * curr_freq/max_freq
+ + *
+ + *   norm_util = running_time/time ~ util/capacity
+ + */
+ +static unsigned long __cpu_norm_util(int cpu, unsigned long capacity, int delta)
+ +{
+ +      int util = __cpu_util(cpu, delta);
+ +
+ +      if (util >= capacity)
+ +              return SCHED_CAPACITY_SCALE;
+ +
+ +      return (util << SCHED_CAPACITY_SHIFT)/capacity;
+ +}
+ +
+ +static int calc_util_delta(struct energy_env *eenv, int cpu)
+ +{
+ +      if (cpu == eenv->src_cpu)
+ +              return -eenv->util_delta;
+ +      if (cpu == eenv->dst_cpu)
+ +              return eenv->util_delta;
+ +      return 0;
+ +}
+ +
+ +static
+ +unsigned long group_max_util(struct energy_env *eenv)
+ +{
+ +      int i, delta;
+ +      unsigned long max_util = 0;
+ +
+ +      for_each_cpu(i, sched_group_cpus(eenv->sg_cap)) {
+ +              delta = calc_util_delta(eenv, i);
+ +              max_util = max(max_util, __cpu_util(i, delta));
+ +      }
+ +
+ +      return max_util;
+ +}
+ +
+ +/*
+ + * group_norm_util() returns the approximated group util relative to it's
+ + * current capacity (busy ratio) in the range [0..SCHED_LOAD_SCALE] for use in
+ + * energy calculations. Since task executions may or may not overlap in time in
+ + * the group the true normalized util is between max(cpu_norm_util(i)) and
+ + * sum(cpu_norm_util(i)) when iterating over all cpus in the group, i. The
+ + * latter is used as the estimate as it leads to a more pessimistic energy
+ + * estimate (more busy).
+ + */
+ +static unsigned
+ +long group_norm_util(struct energy_env *eenv, struct sched_group *sg)
+ +{
+ +      int i, delta;
+ +      unsigned long util_sum = 0;
+ +      unsigned long capacity = sg->sge->cap_states[eenv->cap_idx].cap;
+ +
+ +      for_each_cpu(i, sched_group_cpus(sg)) {
+ +              delta = calc_util_delta(eenv, i);
+ +              util_sum += __cpu_norm_util(i, capacity, delta);
+ +      }
+ +
+ +      if (util_sum > SCHED_CAPACITY_SCALE)
+ +              return SCHED_CAPACITY_SCALE;
+ +      return util_sum;
+ +}
+ +
+ +static int find_new_capacity(struct energy_env *eenv,
+ +      const struct sched_group_energy const *sge)
+ +{
+ +      int idx;
+ +      unsigned long util = group_max_util(eenv);
+ +
+ +      for (idx = 0; idx < sge->nr_cap_states; idx++) {
+ +              if (sge->cap_states[idx].cap >= util)
+ +                      break;
+ +      }
+ +
+ +      eenv->cap_idx = idx;
+ +
+ +      return idx;
+ +}
+ +
+ +static int group_idle_state(struct sched_group *sg)
+ +{
+ +      int i, state = INT_MAX;
+ +
+ +      /* Find the shallowest idle state in the sched group. */
+ +      for_each_cpu(i, sched_group_cpus(sg))
+ +              state = min(state, idle_get_state_idx(cpu_rq(i)));
+ +
+ +      /* Take non-cpuidle idling into account (active idle/arch_cpu_idle()) */
+ +      state++;
+ +
+ +      return state;
+ +}
+ +
+ +/*
+ + * sched_group_energy(): Computes the absolute energy consumption of cpus
+ + * belonging to the sched_group including shared resources shared only by
+ + * members of the group. Iterates over all cpus in the hierarchy below the
+ + * sched_group starting from the bottom working it's way up before going to
+ + * the next cpu until all cpus are covered at all levels. The current
+ + * implementation is likely to gather the same util statistics multiple times.
+ + * This can probably be done in a faster but more complex way.
+ + * Note: sched_group_energy() may fail when racing with sched_domain updates.
+ + */
+ +static int sched_group_energy(struct energy_env *eenv)
+ +{
+ +      struct sched_domain *sd;
+ +      int cpu, total_energy = 0;
+ +      struct cpumask visit_cpus;
+ +      struct sched_group *sg;
+ +
+ +      WARN_ON(!eenv->sg_top->sge);
+ +
+ +      cpumask_copy(&visit_cpus, sched_group_cpus(eenv->sg_top));
+ +
+ +      while (!cpumask_empty(&visit_cpus)) {
+ +              struct sched_group *sg_shared_cap = NULL;
+ +
+ +              cpu = cpumask_first(&visit_cpus);
+ +              cpumask_clear_cpu(cpu, &visit_cpus);
+ +
+ +              /*
+ +               * Is the group utilization affected by cpus outside this
+ +               * sched_group?
+ +               */
+ +              sd = rcu_dereference(per_cpu(sd_scs, cpu));
+ +
+ +              if (!sd)
+ +                      /*
+ +                       * We most probably raced with hotplug; returning a
+ +                       * wrong energy estimation is better than entering an
+ +                       * infinite loop.
+ +                       */
+ +                      return -EINVAL;
+ +
+ +              if (sd->parent)
+ +                      sg_shared_cap = sd->parent->groups;
+ +
+ +              for_each_domain(cpu, sd) {
+ +                      sg = sd->groups;
+ +
+ +                      /* Has this sched_domain already been visited? */
+ +                      if (sd->child && group_first_cpu(sg) != cpu)
+ +                              break;
+ +
+ +                      do {
+ +                              unsigned long group_util;
+ +                              int sg_busy_energy, sg_idle_energy;
+ +                              int cap_idx, idle_idx;
+ +
+ +                              if (sg_shared_cap && sg_shared_cap->group_weight >= sg->group_weight)
+ +                                      eenv->sg_cap = sg_shared_cap;
+ +                              else
+ +                                      eenv->sg_cap = sg;
+ +
+ +                              cap_idx = find_new_capacity(eenv, sg->sge);
+ +
+ +                              if (sg->group_weight == 1) {
+ +                                      /* Remove capacity of src CPU (before task move) */
+ +                                      if (eenv->util_delta == 0 &&
+ +                                          cpumask_test_cpu(eenv->src_cpu, sched_group_cpus(sg))) {
+ +                                              eenv->cap.before = sg->sge->cap_states[cap_idx].cap;
+ +                                              eenv->cap.delta -= eenv->cap.before;
+ +                                      }
+ +                                      /* Add capacity of dst CPU  (after task move) */
+ +                                      if (eenv->util_delta != 0 &&
+ +                                          cpumask_test_cpu(eenv->dst_cpu, sched_group_cpus(sg))) {
+ +                                              eenv->cap.after = sg->sge->cap_states[cap_idx].cap;
+ +                                              eenv->cap.delta += eenv->cap.after;
+ +                                      }
+ +                              }
+ +
+ +                              idle_idx = group_idle_state(sg);
+ +                              group_util = group_norm_util(eenv, sg);
+ +                              sg_busy_energy = (group_util * sg->sge->cap_states[cap_idx].power)
+ +                                                              >> SCHED_CAPACITY_SHIFT;
+ +                              sg_idle_energy = ((SCHED_LOAD_SCALE-group_util)
+ +                                                              * sg->sge->idle_states[idle_idx].power)
+ +                                                              >> SCHED_CAPACITY_SHIFT;
+ +
+ +                              total_energy += sg_busy_energy + sg_idle_energy;
+ +
+ +                              if (!sd->child) {
+ +                                      int i;
+ +
+ +                                      for_each_cpu(i, sched_group_cpus(sg))
+ +                                              cpumask_clear_cpu(i, &visit_cpus);
+ +                              }
+ +
+ +                              if (cpumask_equal(sched_group_cpus(sg), sched_group_cpus(eenv->sg_top)))
+ +                                      goto next_cpu;
+ +
+ +                      } while (sg = sg->next, sg != sd->groups);
+ +              }
+ +next_cpu:
+ +              continue;
+ +      }
+ +
+ +      eenv->energy = total_energy;
+ +      return 0;
+ +}
+ +
+ +static inline bool cpu_in_sg(struct sched_group *sg, int cpu)
+ +{
+ +      return cpu != -1 && cpumask_test_cpu(cpu, sched_group_cpus(sg));
+ +}
+ +
+ +#ifdef CONFIG_SCHED_TUNE
+ +static int energy_diff_evaluate(struct energy_env *eenv)
+ +{
+ +      unsigned int boost;
+ +      int nrg_delta;
+ +
+ +      /* Return energy diff when boost margin is 0 */
+ +#ifdef CONFIG_CGROUP_SCHEDTUNE
+ +      boost = schedtune_task_boost(eenv->task);
+ +#else
+ +      boost = get_sysctl_sched_cfs_boost();
+ +#endif
+ +      if (boost == 0)
+ +              return eenv->nrg.diff;
+ +
+ +      /* Compute normalized energy diff */
+ +      nrg_delta = schedtune_normalize_energy(eenv->nrg.diff);
+ +      eenv->nrg.delta = nrg_delta;
+ +
+ +      eenv->payoff = schedtune_accept_deltas(
+ +                      eenv->nrg.delta,
+ +                      eenv->cap.delta,
+ +                      eenv->task);
+ +
+ +      /*
+ +       * When SchedTune is enabled, the energy_diff() function will return
+ +       * the computed energy payoff value. Since the energy_diff() return
+ +       * value is expected to be negative by its callers, this evaluation
+ +       * function return a negative value each time the evaluation return a
+ +       * positive payoff, which is the condition for the acceptance of
+ +       * a scheduling decision
+ +       */
+ +      return -eenv->payoff;
+ +}
+ +#else /* CONFIG_SCHED_TUNE */
+ +#define energy_diff_evaluate(eenv) eenv->nrg.diff
+ +#endif
+ +
+ +/*
+ + * energy_diff(): Estimate the energy impact of changing the utilization
+ + * distribution. eenv specifies the change: utilisation amount, source, and
+ + * destination cpu. Source or destination cpu may be -1 in which case the
+ + * utilization is removed from or added to the system (e.g. task wake-up). If
+ + * both are specified, the utilization is migrated.
+ + */
+ +static int energy_diff(struct energy_env *eenv)
+ +{
+ +      struct sched_domain *sd;
+ +      struct sched_group *sg;
+ +      int sd_cpu = -1, energy_before = 0, energy_after = 0;
+ +
+ +      struct energy_env eenv_before = {
+ +              .util_delta     = 0,
+ +              .src_cpu        = eenv->src_cpu,
+ +              .dst_cpu        = eenv->dst_cpu,
+ +              .nrg            = { 0, 0, 0, 0},
+ +              .cap            = { 0, 0, 0 },
+ +      };
+ +
+ +      if (eenv->src_cpu == eenv->dst_cpu)
+ +              return 0;
+ +
+ +      sd_cpu = (eenv->src_cpu != -1) ? eenv->src_cpu : eenv->dst_cpu;
+ +      sd = rcu_dereference(per_cpu(sd_ea, sd_cpu));
+ +
+ +      if (!sd)
+ +              return 0; /* Error */
+ +
+ +      sg = sd->groups;
+ +
+ +      do {
+ +              if (cpu_in_sg(sg, eenv->src_cpu) || cpu_in_sg(sg, eenv->dst_cpu)) {
+ +                      eenv_before.sg_top = eenv->sg_top = sg;
+ +
+ +                      if (sched_group_energy(&eenv_before))
+ +                              return 0; /* Invalid result abort */
+ +                      energy_before += eenv_before.energy;
+ +
+ +                      /* Keep track of SRC cpu (before) capacity */
+ +                      eenv->cap.before = eenv_before.cap.before;
+ +                      eenv->cap.delta = eenv_before.cap.delta;
+ +
+ +                      if (sched_group_energy(eenv))
+ +                              return 0; /* Invalid result abort */
+ +                      energy_after += eenv->energy;
+ +              }
+ +      } while (sg = sg->next, sg != sd->groups);
+ +
+ +      eenv->nrg.before = energy_before;
+ +      eenv->nrg.after = energy_after;
+ +      eenv->nrg.diff = eenv->nrg.after - eenv->nrg.before;
+ +      eenv->payoff = 0;
+ +
+ +      return energy_diff_evaluate(eenv);
+ +}
+ +
   /*
    * Detect M:N waker/wakee relationships via a switching-frequency heuristic.
    * A waker of many should wake a different task than the one last awakened
@@@ -5141,157 -4742,6 +5158,157 @@@ static int wake_affine(struct sched_dom
         return 1;
   }
   
+ +static inline unsigned long task_util(struct task_struct *p)
+ +{
+ +      return p->se.avg.util_avg;
+ +}
+ +
+ +unsigned int capacity_margin = 1280; /* ~20% margin */
+ +
+ +static inline unsigned long boosted_task_util(struct task_struct *task);
+ +
+ +static inline bool __task_fits(struct task_struct *p, int cpu, int util)
+ +{
+ +      unsigned long capacity = capacity_of(cpu);
+ +
+ +      util += boosted_task_util(p);
+ +
+ +      return (capacity * 1024) > (util * capacity_margin);
+ +}
+ +
+ +static inline bool task_fits_max(struct task_struct *p, int cpu)
+ +{
+ +      unsigned long capacity = capacity_of(cpu);
+ +      unsigned long max_capacity = cpu_rq(cpu)->rd->max_cpu_capacity.val;
+ +
+ +      if (capacity == max_capacity)
+ +              return true;
+ +
+ +      if (capacity * capacity_margin > max_capacity * 1024)
+ +              return true;
+ +
+ +      return __task_fits(p, cpu, 0);
+ +}
+ +
+ +static inline bool task_fits_spare(struct task_struct *p, int cpu)
+ +{
+ +      return __task_fits(p, cpu, cpu_util(cpu));
+ +}
+ +
+ +static bool cpu_overutilized(int cpu)
+ +{
+ +      return (capacity_of(cpu) * 1024) < (cpu_util(cpu) * capacity_margin);
+ +}
+ +
+ +#ifdef CONFIG_SCHED_TUNE
+ +
+ +static unsigned long
+ +schedtune_margin(unsigned long signal, unsigned long boost)
+ +{
+ +      unsigned long long margin = 0;
+ +
+ +      /*
+ +       * Signal proportional compensation (SPC)
+ +       *
+ +       * The Boost (B) value is used to compute a Margin (M) which is
+ +       * proportional to the complement of the original Signal (S):
+ +       *   M = B * (SCHED_LOAD_SCALE - S)
+ +       * The obtained M could be used by the caller to "boost" S.
+ +       */
+ +      margin  = SCHED_LOAD_SCALE - signal;
+ +      margin *= boost;
+ +
+ +      /*
+ +       * Fast integer division by constant:
+ +       *  Constant   :                 (C) = 100
+ +       *  Precision  : 0.1%            (P) = 0.1
+ +       *  Reference  : C * 100 / P     (R) = 100000
+ +       *
+ +       * Thus:
+ +       *  Shift bits : ceil(log(R,2))  (S) = 17
+ +       *  Mult const : round(2^S/C)    (M) = 1311
+ +       *
+ +       *
+ +       */
+ +      margin  *= 1311;
+ +      margin >>= 17;
+ +
+ +      return margin;
+ +}
+ +
+ +static inline unsigned int
+ +schedtune_cpu_margin(unsigned long util, int cpu)
+ +{
+ +      unsigned int boost;
+ +
+ +#ifdef CONFIG_CGROUP_SCHEDTUNE
+ +      boost = schedtune_cpu_boost(cpu);
+ +#else
+ +      boost = get_sysctl_sched_cfs_boost();
+ +#endif
+ +      if (boost == 0)
+ +              return 0;
+ +
+ +      return schedtune_margin(util, boost);
+ +}
+ +
+ +static inline unsigned long
+ +schedtune_task_margin(struct task_struct *task)
+ +{
+ +      unsigned int boost;
+ +      unsigned long util;
+ +      unsigned long margin;
+ +
+ +#ifdef CONFIG_CGROUP_SCHEDTUNE
+ +      boost = schedtune_task_boost(task);
+ +#else
+ +      boost = get_sysctl_sched_cfs_boost();
+ +#endif
+ +      if (boost == 0)
+ +              return 0;
+ +
+ +      util = task_util(task);
+ +      margin = schedtune_margin(util, boost);
+ +
+ +      return margin;
+ +}
+ +
+ +#else /* CONFIG_SCHED_TUNE */
+ +
+ +static inline unsigned int
+ +schedtune_cpu_margin(unsigned long util, int cpu)
+ +{
+ +      return 0;
+ +}
+ +
+ +static inline unsigned int
+ +schedtune_task_margin(struct task_struct *task)
+ +{
+ +      return 0;
+ +}
+ +
+ +#endif /* CONFIG_SCHED_TUNE */
+ +
+ +static inline unsigned long
+ +boosted_cpu_util(int cpu)
+ +{
+ +      unsigned long util = cpu_util(cpu);
+ +      unsigned long margin = schedtune_cpu_margin(util, cpu);
+ +
+ +      trace_sched_boost_cpu(cpu, util, margin);
+ +
+ +      return util + margin;
+ +}
+ +
+ +static inline unsigned long
+ +boosted_task_util(struct task_struct *task)
+ +{
+ +      unsigned long util = task_util(task);
+ +      unsigned long margin = schedtune_task_margin(task);
+ +
+ +      return util + margin;
+ +}
+ +
   /*
    * find_idlest_group finds and returns the least busy CPU group within the
    * domain.
@@@ -5301,10 -4751,7 +5318,10 @@@ find_idlest_group(struct sched_domain *
                   int this_cpu, int sd_flag)
   {
         struct sched_group *idlest = NULL, *group = sd->groups;
+ +      struct sched_group *fit_group = NULL, *spare_group = NULL;
         unsigned long min_load = ULONG_MAX, this_load = 0;
+ +      unsigned long fit_capacity = ULONG_MAX;
+ +      unsigned long max_spare_capacity = capacity_margin - SCHED_LOAD_SCALE;
         int load_idx = sd->forkexec_idx;
         int imbalance = 100 + (sd->imbalance_pct-100)/2;
   
@@@ -5312,7 -4759,7 +5329,7 @@@
                 load_idx = sd->wake_idx;
   
         do {
- -              unsigned long load, avg_load;
+ +              unsigned long load, avg_load, spare_capacity;
                 int local_group;
                 int i;
   
@@@ -5335,25 -4782,6 +5352,25 @@@
                                 load = target_load(i, load_idx);
   
                         avg_load += load;
+ +
+ +                      /*
+ +                       * Look for most energy-efficient group that can fit
+ +                       * that can fit the task.
+ +                       */
+ +                      if (capacity_of(i) < fit_capacity && task_fits_spare(p, i)) {
+ +                              fit_capacity = capacity_of(i);
+ +                              fit_group = group;
+ +                      }
+ +
+ +                      /*
+ +                       * Look for group which has most spare capacity on a
+ +                       * single cpu.
+ +                       */
+ +                      spare_capacity = capacity_of(i) - cpu_util(i);
+ +                      if (spare_capacity > max_spare_capacity) {
+ +                              max_spare_capacity = spare_capacity;
+ +                              spare_group = group;
+ +                      }
                 }
   
                 /* Adjust by relative CPU capacity of the group */
@@@ -5367,12 -4795,6 +5384,12 @@@
                 }
         } while (group = group->next, group != sd->groups);
   
+ +      if (fit_group)
+ +              return fit_group;
+ +
+ +      if (spare_group)
+ +              return spare_group;
+ +
         if (!idlest || 100*this_load < imbalance*min_load)
                 return NULL;
         return idlest;
@@@ -5393,7 -4815,7 +5410,7 @@@ find_idlest_cpu(struct sched_group *gro
   
         /* Traverse only the allowed CPUs */
         for_each_cpu_and(i, sched_group_cpus(group), tsk_cpus_allowed(p)) {
- -              if (idle_cpu(i)) {
+ +              if (task_fits_spare(p, i)) {
                         struct rq *rq = cpu_rq(i);
                         struct cpuidle_state *idle = idle_get_state(rq);
                         if (idle && idle->exit_latency < min_exit_latency) {
@@@ -5405,8 -4827,7 +5422,8 @@@
                                 min_exit_latency = idle->exit_latency;
                                 latest_idle_timestamp = rq->idle_stamp;
                                 shallowest_idle_cpu = i;
- -                      } else if ((!idle || idle->exit_latency == min_exit_latency) &&
+ +                      } else if (idle_cpu(i) &&
+ +                                 (!idle || idle->exit_latency == min_exit_latency) &&
                                    rq->idle_stamp > latest_idle_timestamp) {
                                 /*
                                  * If equal or no active idle state, then
@@@ -5415,13 -4836,6 +5432,13 @@@
                                  */
                                 latest_idle_timestamp = rq->idle_stamp;
                                 shallowest_idle_cpu = i;
+ +                      } else if (shallowest_idle_cpu == -1) {
+ +                              /*
+ +                               * If we haven't found an idle CPU yet
+ +                               * pick a non-idle one that can fit the task as
+ +                               * fallback.
+ +                               */
+ +                              shallowest_idle_cpu = i;
                         }
                 } else if (shallowest_idle_cpu == -1) {
                         load = weighted_cpuload(i);
@@@ -5480,85 -4894,38 +5497,85 @@@ done
         return target;
   }
   
- -/*
- - * cpu_util returns the amount of capacity of a CPU that is used by CFS
- - * tasks. The unit of the return value must be the one of capacity so we can
- - * compare the utilization with the capacity of the CPU that is available for
- - * CFS task (ie cpu_capacity).
- - *
- - * cfs_rq.avg.util_avg is the sum of running time of runnable tasks plus the
- - * recent utilization of currently non-runnable tasks on a CPU. It represents
- - * the amount of utilization of a CPU in the range [0..capacity_orig] where
- - * capacity_orig is the cpu_capacity available at the highest frequency
- - * (arch_scale_freq_capacity()).
- - * The utilization of a CPU converges towards a sum equal to or less than the
- - * current capacity (capacity_curr <= capacity_orig) of the CPU because it is
- - * the running time on this CPU scaled by capacity_curr.
- - *
- - * Nevertheless, cfs_rq.avg.util_avg can be higher than capacity_curr or even
- - * higher than capacity_orig because of unfortunate rounding in
- - * cfs.avg.util_avg or just after migrating tasks and new task wakeups until
- - * the average stabilizes with the new running time. We need to check that the
- - * utilization stays within the range of [0..capacity_orig] and cap it if
- - * necessary. Without utilization capping, a group could be seen as overloaded
- - * (CPU0 utilization at 121% + CPU1 utilization at 80%) whereas CPU1 has 20% of
- - * available capacity. We allow utilization to overshoot capacity_curr (but not
- - * capacity_orig) as it useful for predicting the capacity required after task
- - * migrations (scheduler-driven DVFS).
- - */
- -static int cpu_util(int cpu)
+ +static int energy_aware_wake_cpu(struct task_struct *p, int target)
   {
- -      unsigned long util = cpu_rq(cpu)->cfs.avg.util_avg;
- -      unsigned long capacity = capacity_orig_of(cpu);
+ +      struct sched_domain *sd;
+ +      struct sched_group *sg, *sg_target;
+ +      int target_max_cap = INT_MAX;
+ +      int target_cpu = task_cpu(p);
+ +      int i;
+ +
+ +      sd = rcu_dereference(per_cpu(sd_ea, task_cpu(p)));
+ +
+ +      if (!sd)
+ +              return target;
+ +
+ +      sg = sd->groups;
+ +      sg_target = sg;
+ +
+ +      /*
+ +       * Find group with sufficient capacity. We only get here if no cpu is
+ +       * overutilized. We may end up overutilizing a cpu by adding the task,
+ +       * but that should not be any worse than select_idle_sibling().
+ +       * load_balance() should sort it out later as we get above the tipping
+ +       * point.
+ +       */
+ +      do {
+ +              /* Assuming all cpus are the same in group */
+ +              int max_cap_cpu = group_first_cpu(sg);
+ +
+ +              /*
+ +               * Assume smaller max capacity means more energy-efficient.
+ +               * Ideally we should query the energy model for the right
+ +               * answer but it easily ends up in an exhaustive search.
+ +               */
+ +              if (capacity_of(max_cap_cpu) < target_max_cap &&
+ +                  task_fits_max(p, max_cap_cpu)) {
+ +                      sg_target = sg;
+ +                      target_max_cap = capacity_of(max_cap_cpu);
+ +              }
+ +      } while (sg = sg->next, sg != sd->groups);
+ +
+ +      /* Find cpu with sufficient capacity */
+ +      for_each_cpu_and(i, tsk_cpus_allowed(p), sched_group_cpus(sg_target)) {
+ +              /*
+ +               * p's blocked utilization is still accounted for on prev_cpu
+ +               * so prev_cpu will receive a negative bias due to the double
+ +               * accounting. However, the blocked utilization may be zero.
+ +               */
+ +              int new_util = cpu_util(i) + boosted_task_util(p);
+ +
+ +              if (new_util > capacity_orig_of(i))
+ +                      continue;
   
- -      return (util >= capacity) ? capacity : util;
+ +              if (new_util < capacity_curr_of(i)) {
+ +                      target_cpu = i;
+ +                      if (cpu_rq(i)->nr_running)
+ +                              break;
+ +              }
+ +
+ +              /* cpu has capacity at higher OPP, keep it as fallback */
+ +              if (target_cpu == task_cpu(p))
+ +                      target_cpu = i;
+ +      }
+ +
+ +      if (target_cpu != task_cpu(p)) {
+ +              struct energy_env eenv = {
+ +                      .util_delta     = task_util(p),
+ +                      .src_cpu        = task_cpu(p),
+ +                      .dst_cpu        = target_cpu,
+ +                      .task           = p,
+ +              };
+ +
+ +              /* Not enough spare capacity on previous cpu */
+ +              if (cpu_overutilized(task_cpu(p)))
+ +                      return target_cpu;
+ +
+ +              if (energy_diff(&eenv) >= 0)
+ +                      return task_cpu(p);
+ +      }
+ +
+ +      return target_cpu;
   }
   
   /*
@@@ -5583,9 -4950,7 +5600,9 @@@ select_task_rq_fair(struct task_struct 
         int sync = wake_flags & WF_SYNC;
   
         if (sd_flag & SD_BALANCE_WAKE)
- -              want_affine = !wake_wide(p) && cpumask_test_cpu(cpu, tsk_cpus_allowed(p));
+ +              want_affine = (!wake_wide(p) && task_fits_max(p, cpu) &&
+ +                            cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) ||
+ +                            energy_aware();
   
         rcu_read_lock();
         for_each_domain(cpu, tmp) {
@@@ -5615,9 -4980,7 +5632,9 @@@
         }
   
         if (!sd) {
- -              if (sd_flag & SD_BALANCE_WAKE) /* XXX always ? */
+ +              if (energy_aware() && !cpu_rq(cpu)->rd->overutilized)
+ +                      new_cpu = energy_aware_wake_cpu(p, prev_cpu);
+ +              else if (sd_flag & SD_BALANCE_WAKE) /* XXX always ? */
                         new_cpu = select_idle_sibling(p, new_cpu);
   
         } else while (sd) {
@@@ -5933,8 -5296,6 +5950,8 @@@ again
         if (hrtick_enabled(rq))
                 hrtick_start_fair(rq, p);
   
+ +      rq->misfit_task = !task_fits_max(p, rq->cpu);
+ +
         return p;
   simple:
         cfs_rq = &rq->cfs;
@@@ -5956,12 -5317,9 +5973,12 @@@
         if (hrtick_enabled(rq))
                 hrtick_start_fair(rq, p);
   
+ +      rq->misfit_task = !task_fits_max(p, rq->cpu);
+ +
         return p;
   
   idle:
+ +      rq->misfit_task = 0;
         /*
          * This is OK, because current is on_cpu, which avoids it being picked
          * for load-balance and preemption/IRQs are still disabled avoiding
@@@ -6174,13 -5532,6 +6191,13 @@@ static unsigned long __read_mostly max_
   
   enum fbq_type { regular, remote, all };
   
+ +enum group_type {
+ +      group_other = 0,
+ +      group_misfit_task,
+ +      group_imbalanced,
+ +      group_overloaded,
+ +};
+ +
   #define LBF_ALL_PINNED        0x01
   #define LBF_NEED_BREAK        0x02
   #define LBF_DST_PINNED  0x04
@@@ -6199,7 -5550,6 +6216,7 @@@ struct lb_env 
         int                     new_dst_cpu;
         enum cpu_idle_type      idle;
         long                    imbalance;
+ +      unsigned int            src_grp_nr_running;
         /* The set of CPUs under consideration for load-balancing */
         struct cpumask          *cpus;
   
@@@ -6210,7 -5560,6 +6227,7 @@@
         unsigned int            loop_max;
   
         enum fbq_type           fbq_type;
+ +      enum group_type         busiest_group_type;
         struct list_head        tasks;
   };
   
@@@ -6537,10 -5886,6 +6554,10 @@@ static void attach_one_task(struct rq *
   {
         raw_spin_lock(&rq->lock);
         attach_task(rq, p);
+ +      /*
+ +       * We want to potentially raise target_cpu's OPP.
+ +       */
+ +      update_capacity_of(cpu_of(rq));
         raw_spin_unlock(&rq->lock);
   }
   
@@@ -6562,11 -5907,6 +6579,11 @@@ static void attach_tasks(struct lb_env 
                 attach_task(env->dst_rq, p);
         }
   
+ +      /*
+ +       * We want to potentially raise env.dst_cpu's OPP.
+ +       */
+ +      update_capacity_of(env->dst_cpu);
+ +
         raw_spin_unlock(&env->dst_rq->lock);
   }
   
@@@ -6662,6 -6002,12 +6679,6 @@@ static unsigned long task_h_load(struc
   
   /********** Helpers for find_busiest_group ************************/
   
- -enum group_type {
- -      group_other = 0,
- -      group_imbalanced,
- -      group_overloaded,
- -};
- -
   /*
    * sg_lb_stats - stats of a sched_group required for load_balancing
    */
@@@ -6677,7 -6023,6 +6694,7 @@@ struct sg_lb_stats 
         unsigned int group_weight;
         enum group_type group_type;
         int group_no_capacity;
+ +      int group_misfit_task; /* A cpu has a task too big for its capacity */
   #ifdef CONFIG_NUMA_BALANCING
         unsigned int nr_numa_running;
         unsigned int nr_preferred_running;
@@@ -6769,57 -6114,19 +6786,57 @@@ static unsigned long scale_rt_capacity(
   
         used = div_u64(avg, total);
   
+ +      /*
+ +       * deadline bandwidth is defined at system level so we must
+ +       * weight this bandwidth with the max capacity of the system.
+ +       * As a reminder, avg_bw is 20bits width and
+ +       * scale_cpu_capacity is 10 bits width
+ +       */
+ +      used += div_u64(rq->dl.avg_bw, arch_scale_cpu_capacity(NULL, cpu));
+ +
         if (likely(used < SCHED_CAPACITY_SCALE))
                 return SCHED_CAPACITY_SCALE - used;
   
         return 1;
   }
   
+ +void init_max_cpu_capacity(struct max_cpu_capacity *mcc)
+ +{
+ +      raw_spin_lock_init(&mcc->lock);
+ +      mcc->val = 0;
+ +      mcc->cpu = -1;
+ +}
+ +
   static void update_cpu_capacity(struct sched_domain *sd, int cpu)
   {
         unsigned long capacity = arch_scale_cpu_capacity(sd, cpu);
         struct sched_group *sdg = sd->groups;
+ +      struct max_cpu_capacity *mcc;
+ +      unsigned long max_capacity;
+ +      int max_cap_cpu;
+ +      unsigned long flags;
   
         cpu_rq(cpu)->cpu_capacity_orig = capacity;
   
+ +      mcc = &cpu_rq(cpu)->rd->max_cpu_capacity;
+ +
+ +      raw_spin_lock_irqsave(&mcc->lock, flags);
+ +      max_capacity = mcc->val;
+ +      max_cap_cpu = mcc->cpu;
+ +
+ +      if ((max_capacity > capacity && max_cap_cpu == cpu) ||
+ +          (max_capacity < capacity)) {
+ +              mcc->val = capacity;
+ +              mcc->cpu = cpu;
+ +#ifdef CONFIG_SCHED_DEBUG
+ +              raw_spin_unlock_irqrestore(&mcc->lock, flags);
+ +              //pr_info("CPU%d: update max cpu_capacity %lu\n", cpu, capacity);
+ +              goto skip_unlock;
+ +#endif
+ +      }
+ +      raw_spin_unlock_irqrestore(&mcc->lock, flags);
+ +
+ +skip_unlock: __attribute__ ((unused));
         capacity *= scale_rt_capacity(cpu);
         capacity >>= SCHED_CAPACITY_SHIFT;
   
@@@ -6828,14 -6135,13 +6845,14 @@@
   
         cpu_rq(cpu)->cpu_capacity = capacity;
         sdg->sgc->capacity = capacity;
+ +      sdg->sgc->max_capacity = capacity;
   }
   
   void update_group_capacity(struct sched_domain *sd, int cpu)
   {
         struct sched_domain *child = sd->child;
         struct sched_group *group, *sdg = sd->groups;
- -      unsigned long capacity;
+ +      unsigned long capacity, max_capacity;
         unsigned long interval;
   
         interval = msecs_to_jiffies(sd->balance_interval);
@@@ -6848,7 -6154,6 +6865,7 @@@
         }
   
         capacity = 0;
+ +      max_capacity = 0;
   
         if (child->flags & SD_OVERLAP) {
                 /*
@@@ -6873,12 -6178,11 +6890,12 @@@
                          */
                         if (unlikely(!rq->sd)) {
                                 capacity += capacity_of(cpu);
- -                              continue;
+ +                      } else {
+ +                              sgc = rq->sd->groups->sgc;
+ +                              capacity += sgc->capacity;
                         }
   
- -                      sgc = rq->sd->groups->sgc;
- -                      capacity += sgc->capacity;
+ +                      max_capacity = max(capacity, max_capacity);
                 }
         } else  {
                 /*
@@@ -6888,16 -6192,12 +6905,16 @@@
   
                 group = child->groups;
                 do {
- -                      capacity += group->sgc->capacity;
+ +                      struct sched_group_capacity *sgc = group->sgc;
+ +
+ +                      capacity += sgc->capacity;
+ +                      max_capacity = max(sgc->max_capacity, max_capacity);
                         group = group->next;
                 } while (group != child->groups);
         }
   
         sdg->sgc->capacity = capacity;
+ +      sdg->sgc->max_capacity = max_capacity;
   }
   
   /*
@@@ -6992,18 -6292,6 +7009,18 @@@ group_is_overloaded(struct lb_env *env
         return false;
   }
   
+ +
+ +/*
+ + * group_smaller_cpu_capacity: Returns true if sched_group sg has smaller
+ + * per-cpu capacity than sched_group ref.
+ + */
+ +static inline bool
+ +group_smaller_cpu_capacity(struct sched_group *sg, struct sched_group *ref)
+ +{
+ +      return sg->sgc->max_capacity + capacity_margin - SCHED_LOAD_SCALE <
+ +                                                      ref->sgc->max_capacity;
+ +}
+ +
   static inline enum
   group_type group_classify(struct sched_group *group,
                           struct sg_lb_stats *sgs)
@@@ -7014,9 -6302,6 +7031,9 @@@
         if (sg_imbalanced(group))
                 return group_imbalanced;
   
+ +      if (sgs->group_misfit_task)
+ +              return group_misfit_task;
+ +
         return group_other;
   }
   
@@@ -7028,12 -6313,11 +7045,12 @@@
    * @local_group: Does group contain this_cpu.
    * @sgs: variable to hold the statistics for this group.
    * @overload: Indicate more than one runnable task for any CPU.
+ + * @overutilized: Indicate overutilization for any CPU.
    */
   static inline void update_sg_lb_stats(struct lb_env *env,
                         struct sched_group *group, int load_idx,
                         int local_group, struct sg_lb_stats *sgs,
- -                      bool *overload)
+ +                      bool *overload, bool *overutilized)
   {
         unsigned long load;
         int i;
@@@ -7063,12 -6347,6 +7080,12 @@@
                 sgs->sum_weighted_load += weighted_cpuload(i);
                 if (idle_cpu(i))
                         sgs->idle_cpus++;
+ +
+ +              if (cpu_overutilized(i)) {
+ +                      *overutilized = true;
+ +                      if (!sgs->group_misfit_task && rq->misfit_task)
+ +                              sgs->group_misfit_task = capacity_of(i);
+ +              }
         }
   
         /* Adjust by relative CPU capacity of the group */
@@@ -7110,25 -6388,9 +7127,25 @@@ static bool update_sd_pick_busiest(stru
         if (sgs->group_type < busiest->group_type)
                 return false;
   
+ +      /*
+ +       * Candidate sg doesn't face any serious load-balance problems
+ +       * so don't pick it if the local sg is already filled up.
+ +       */
+ +      if (sgs->group_type == group_other &&
+ +          !group_has_capacity(env, &sds->local_stat))
+ +              return false;
+ +
         if (sgs->avg_load <= busiest->avg_load)
                 return false;
   
+ +      /*
+ +       * Candiate sg has no more than one task per cpu and has higher
+ +       * per-cpu capacity. No reason to pull tasks to less capable cpus.
+ +       */
+ +      if (sgs->sum_nr_running <= sgs->group_weight &&
+ +          group_smaller_cpu_capacity(sds->local, sg))
+ +              return false;
+ +
         /* This is the busiest node in its class. */
         if (!(env->sd->flags & SD_ASYM_PACKING))
                 return true;
@@@ -7190,7 -6452,7 +7207,7 @@@ static inline void update_sd_lb_stats(s
         struct sched_group *sg = env->sd->groups;
         struct sg_lb_stats tmp_sgs;
         int load_idx, prefer_sibling = 0;
- -      bool overload = false;
+ +      bool overload = false, overutilized = false;
   
         if (child && child->flags & SD_PREFER_SIBLING)
                 prefer_sibling = 1;
@@@ -7212,7 -6474,7 +7229,7 @@@
                 }
   
                 update_sg_lb_stats(env, sg, load_idx, local_group, sgs,
- -                                              &overload);
+ +                                              &overload, &overutilized);
   
                 if (local_group)
                         goto next_group;
@@@ -7234,15 -6496,6 +7251,15 @@@
                         sgs->group_type = group_classify(sg, sgs);
                 }
   
+ +              /*
+ +               * Ignore task groups with misfit tasks if local group has no
+ +               * capacity or if per-cpu capacity isn't higher.
+ +               */
+ +              if (sgs->group_type == group_misfit_task &&
+ +                  (!group_has_capacity(env, &sds->local_stat) ||
+ +                   !group_smaller_cpu_capacity(sg, sds->local)))
+ +                      sgs->group_type = group_other;
+ +
                 if (update_sd_pick_busiest(env, sds, sg, sgs)) {
                         sds->busiest = sg;
                         sds->busiest_stat = *sgs;
@@@ -7259,20 -6512,12 +7276,20 @@@ next_group
         if (env->sd->flags & SD_NUMA)
                 env->fbq_type = fbq_classify_group(&sds->busiest_stat);
   
+ +      env->src_grp_nr_running = sds->busiest_stat.sum_nr_running;
+ +
         if (!env->sd->parent) {
                 /* update overload indicator if we are at root domain */
                 if (env->dst_rq->rd->overload != overload)
                         env->dst_rq->rd->overload = overload;
- -      }
   
+ +              /* Update over-utilization (tipping point, U >= 0) indicator */
+ +              if (env->dst_rq->rd->overutilized != overutilized)
+ +                      env->dst_rq->rd->overutilized = overutilized;
+ +      } else {
+ +              if (!env->dst_rq->rd->overutilized && overutilized)
+ +                      env->dst_rq->rd->overutilized = true;
+ +      }
   }
   
   /**
@@@ -7419,22 -6664,6 +7436,22 @@@ static inline void calculate_imbalance(
          */
         if (busiest->avg_load <= sds->avg_load ||
             local->avg_load >= sds->avg_load) {
+ +              /* Misfitting tasks should be migrated in any case */
+ +              if (busiest->group_type == group_misfit_task) {
+ +                      env->imbalance = busiest->group_misfit_task;
+ +                      return;
+ +              }
+ +
+ +              /*
+ +               * Busiest group is overloaded, local is not, use the spare
+ +               * cycles to maximize throughput
+ +               */
+ +              if (busiest->group_type == group_overloaded &&
+ +                  local->group_type <= group_misfit_task) {
+ +                      env->imbalance = busiest->load_per_task;
+ +                      return;
+ +              }
+ +
                 env->imbalance = 0;
                 return fix_small_imbalance(env, sds);
         }
@@@ -7468,11 -6697,6 +7485,11 @@@
                 (sds->avg_load - local->avg_load) * local->group_capacity
         ) / SCHED_CAPACITY_SCALE;
   
+ +      /* Boost imbalance to allow misfit task to be balanced. */
+ +      if (busiest->group_type == group_misfit_task)
+ +              env->imbalance = max_t(long, env->imbalance,
+ +                                   busiest->group_misfit_task);
+ +
         /*
          * if *imbalance is less than the average load per runnable task
          * there is no guarantee that any tasks will be moved so we'll have
@@@ -7514,10 -6738,6 +7531,10 @@@ static struct sched_group *find_busiest
          * this level.
          */
         update_sd_lb_stats(env, &sds);
+ +
+ +      if (energy_aware() && !env->dst_rq->rd->overutilized)
+ +              goto out_balanced;
+ +
         local = &sds.local_stat;
         busiest = &sds.busiest_stat;
   
@@@ -7546,11 -6766,6 +7563,11 @@@
             busiest->group_no_capacity)
                 goto force_balance;
   
+ +      /* Misfitting tasks should be dealt with regardless of the avg load */
+ +      if (busiest->group_type == group_misfit_task) {
+ +              goto force_balance;
+ +      }
+ +
         /*
          * If the local group is busier than the selected busiest group
          * don't try and pull any tasks.
@@@ -7574,8 -6789,7 +7591,8 @@@
                  * might end up to just move the imbalance on another group
                  */
                 if ((busiest->group_type != group_overloaded) &&
- -                              (local->idle_cpus <= (busiest->idle_cpus + 1)))
+ +                  (local->idle_cpus <= (busiest->idle_cpus + 1)) &&
+ +                  !group_smaller_cpu_capacity(sds.busiest, sds.local))
                         goto out_balanced;
         } else {
                 /*
@@@ -7588,7 -6802,6 +7605,7 @@@
         }
   
   force_balance:
+ +      env->busiest_group_type = busiest->group_type;
         /* Looks like there is an imbalance. Compute it */
         calculate_imbalance(env, &sds);
         return sds.busiest;
@@@ -7647,8 -6860,7 +7664,8 @@@ static struct rq *find_busiest_queue(st
                  */
   
                 if (rq->nr_running == 1 && wl > env->imbalance &&
- -                  !check_cpu_capacity(rq, env->sd))
+ +                  !check_cpu_capacity(rq, env->sd) &&
+ +                  env->busiest_group_type != group_misfit_task)
                         continue;
   
                 /*
@@@ -7709,13 -6921,6 +7726,13 @@@ static int need_active_balance(struct l
                         return 1;
         }
   
+ +      if ((capacity_of(env->src_cpu) < capacity_of(env->dst_cpu)) &&
+ +                              env->src_rq->cfs.h_nr_running == 1 &&
+ +                              cpu_overutilized(env->src_cpu) &&
+ +                              !cpu_overutilized(env->dst_cpu)) {
+ +                      return 1;
+ +      }
+ +
         return unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2);
   }
   
@@@ -7837,11 -7042,6 +7854,11 @@@ more_balance
                  * ld_moved     - cumulative load moved across iterations
                  */
                 cur_ld_moved = detach_tasks(&env);
+ +              /*
+ +               * We want to potentially lower env.src_cpu's OPP.
+ +               */
+ +              if (cur_ld_moved)
+ +                      update_capacity_of(env.src_cpu);
   
                 /*
                  * We've detached some tasks from busiest_rq. Every
@@@ -7933,8 -7133,7 +7950,8 @@@
                  * excessive cache_hot migrations and active balances.
                  */
                 if (idle != CPU_NEWLY_IDLE)
- -                      sd->nr_balance_failed++;
+ +                      if (env.src_grp_nr_running > 1)
+ +                              sd->nr_balance_failed++;
   
                 if (need_active_balance(&env)) {
                         raw_spin_lock_irqsave(&busiest->lock, flags);
@@@ -8075,9 -7274,8 +8092,9 @@@ static int idle_balance(struct rq *this
          */
         this_rq->idle_stamp = rq_clock(this_rq);
   
- -      if (this_rq->avg_idle < sysctl_sched_migration_cost ||
- -          !this_rq->rd->overload) {
+ +      if (!energy_aware() &&
+ +          (this_rq->avg_idle < sysctl_sched_migration_cost ||
+ +           !this_rq->rd->overload)) {
                 rcu_read_lock();
                 sd = rcu_dereference_check_sched_domain(this_rq->sd);
                 if (sd)
@@@ -8212,13 -7410,8 +8229,13 @@@ static int active_load_balance_cpu_stop
                 schedstat_inc(sd, alb_count);
   
                 p = detach_one_task(&env);
- -              if (p)
+ +              if (p) {
                         schedstat_inc(sd, alb_pushed);
+ +                      /*
+ +                       * We want to potentially lower env.src_cpu's OPP.
+ +                       */
+ +                      update_capacity_of(env.src_cpu);
+ +              }
                 else
                         schedstat_inc(sd, alb_failed);
         }
@@@ -8598,13 -7791,12 +8615,13 @@@ static inline bool nohz_kick_needed(str
         if (time_before(now, nohz.next_balance))
                 return false;
   
- -      if (rq->nr_running >= 2)
+ +      if (rq->nr_running >= 2 &&
+ +          (!energy_aware() || cpu_overutilized(cpu)))
                 return true;
   
         rcu_read_lock();
         sd = rcu_dereference(per_cpu(sd_busy, cpu));
- -      if (sd) {
+ +      if (sd && !energy_aware()) {
                 sgc = sd->groups->sgc;
                 nr_busy = atomic_read(&sgc->nr_busy_cpus);
   
@@@ -8710,11 -7902,6 +8727,11 @@@ static void task_tick_fair(struct rq *r
   
         if (static_branch_unlikely(&sched_numa_balancing))
                 task_tick_numa(rq, curr);
+ +
+ +      if (!rq->rd->overutilized && cpu_overutilized(task_cpu(curr)))
+ +              rq->rd->overutilized = true;
+ +
+ +      rq->misfit_task = !task_fits_max(curr, rq->cpu);
   }
   
   /*
author	Huang, Tao <huangtao@rock-chips.com>
	Wed, 10 Aug 2016 07:15:47 +0000 (15:15 +0800)
committer	Huang, Tao <huangtao@rock-chips.com>
	Wed, 10 Aug 2016 07:15:47 +0000 (15:15 +0800)
		1	2
Makefile	patch \|	diff1 \|	diff2 \|	blob \| history
arch/arm/mach-mvebu/coherency.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/gpu/drm/drm_atomic.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/gpu/drm/drm_crtc.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/gpu/drm/i915/intel_display.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/gpu/drm/i915/intel_dp.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/gpu/drm/mgag200/mgag200_mode.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/sched/fair.c	patch \|	diff1 \|	diff2 \|	blob \| history