libceph: add support for primary_temp mappings
[firefly-linux-kernel-4.4.55.git] / net / ceph / osdmap.c
index b8bbef0580192e8c1027f48c0b309f4e82daf602..20a38a37794cedd554c23f70ca97e02748e3d4a1 100644 (file)
@@ -1597,24 +1597,110 @@ static int raw_to_up_osds(struct ceph_osdmap *osdmap,
 }
 
 /*
- * Return acting set for given pgid.
+ * Given up set, apply pg_temp and primary_temp mappings.
+ *
+ * Return acting set length.  *primary is set to acting primary osd id,
+ * or -1 if acting set is empty.
+ */
+static int apply_temps(struct ceph_osdmap *osdmap,
+                      struct ceph_pg_pool_info *pool, struct ceph_pg pgid,
+                      int *osds, int len, int *primary)
+{
+       struct ceph_pg_mapping *pg;
+       int temp_len;
+       int temp_primary;
+       int i;
+
+       /* raw_pg -> pg */
+       pgid.seed = ceph_stable_mod(pgid.seed, pool->pg_num,
+                                   pool->pg_num_mask);
+
+       /* pg_temp? */
+       pg = __lookup_pg_mapping(&osdmap->pg_temp, pgid);
+       if (pg) {
+               temp_len = 0;
+               temp_primary = -1;
+
+               for (i = 0; i < pg->pg_temp.len; i++) {
+                       if (ceph_osd_is_down(osdmap, pg->pg_temp.osds[i])) {
+                               if (ceph_can_shift_osds(pool))
+                                       continue;
+                               else
+                                       osds[temp_len++] = CRUSH_ITEM_NONE;
+                       } else {
+                               osds[temp_len++] = pg->pg_temp.osds[i];
+                       }
+               }
+
+               /* apply pg_temp's primary */
+               for (i = 0; i < temp_len; i++) {
+                       if (osds[i] != CRUSH_ITEM_NONE) {
+                               temp_primary = osds[i];
+                               break;
+                       }
+               }
+       } else {
+               temp_len = len;
+               temp_primary = *primary;
+       }
+
+       /* primary_temp? */
+       pg = __lookup_pg_mapping(&osdmap->primary_temp, pgid);
+       if (pg)
+               temp_primary = pg->primary_temp.osd;
+
+       *primary = temp_primary;
+       return temp_len;
+}
+
+/*
+ * Calculate acting set for given pgid.
+ *
+ * Return acting set length, or error.  *primary is set to acting
+ * primary osd id, or -1 if acting set is empty or on error.
  */
 int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
-                       int *acting)
+                       int *osds, int *primary)
 {
-       int rawosds[CEPH_PG_MAX_SIZE], *osds;
-       int i, o, num = CEPH_PG_MAX_SIZE;
+       struct ceph_pg_pool_info *pool;
+       u32 pps;
+       int len;
 
-       osds = calc_pg_raw(osdmap, pgid, rawosds, &num);
-       if (!osds)
-               return -1;
+       pool = __lookup_pg_pool(&osdmap->pg_pools, pgid.pool);
+       if (!pool) {
+               *primary = -1;
+               return -ENOENT;
+       }
 
-       /* primary is first up osd */
-       o = 0;
-       for (i = 0; i < num; i++)
-               if (ceph_osd_is_up(osdmap, osds[i]))
-                       acting[o++] = osds[i];
-       return o;
+       if (pool->flags & CEPH_POOL_FLAG_HASHPSPOOL) {
+               /* hash pool id and seed so that pool PGs do not overlap */
+               pps = crush_hash32_2(CRUSH_HASH_RJENKINS1,
+                                    ceph_stable_mod(pgid.seed, pool->pgp_num,
+                                                    pool->pgp_num_mask),
+                                    pgid.pool);
+       } else {
+               /*
+                * legacy behavior: add ps and pool together.  this is
+                * not a great approach because the PGs from each pool
+                * will overlap on top of each other: 0.5 == 1.4 ==
+                * 2.3 == ...
+                */
+               pps = ceph_stable_mod(pgid.seed, pool->pgp_num,
+                                     pool->pgp_num_mask) +
+                       (unsigned)pgid.pool;
+       }
+
+       len = pg_to_raw_osds(osdmap, pool, pgid, pps, osds);
+       if (len < 0) {
+               *primary = -1;
+               return len;
+       }
+
+       len = raw_to_up_osds(osdmap, pool, osds, len, primary);
+
+       len = apply_temps(osdmap, pool, pgid, osds, len, primary);
+
+       return len;
 }
 
 /*