silo/benchmarks/results/make_graphs-3.py

   1 #!/usr/bin/env python
   2
   3 import matplotlib
   4 import pylab as plt
   5 import numpy as np
   6
   7 import os
   8 import sys
   9 import math
  10
  11 if __name__ == '__main__':
  12   files = sys.argv[1:]
  13   for f in files:
  14     execfile(f)
  15
  16     #names = ['scale', 'multipart:pct', 'multipart:cpu']
  17
  18     def deal_with_posK_res(k, x):
  19       if type(x) == list:
  20         return [e[k] for e in x]
  21       return x[k]
  22
  23     def deal_with_pos0_res(x):
  24       return deal_with_posK_res(0, x)
  25
  26     def deal_with_pos1_res(x):
  27       return deal_with_posK_res(1, x)
  28
  29     import re
  30     RGX = re.compile(r'--new-order-remote-item-pct (\d+)')
  31     def extract_pct(x):
  32       m = RGX.search(x)
  33       assert m
  34       p = int(m.group(1))
  35       assert p >= 0 and p <= 100
  36       def pn(n, p):
  37         return 1.0 - (1.0 - p)**n
  38       def ex(p):
  39         import math
  40         return math.fsum([(1.0/11.0)*pn(float(n), p) for n in range(5, 16)])
  41       return ex(p/100.0) * 100.0
  42
  43     def extract_p(x):
  44       m = RGX.search(x)
  45       assert m
  46       p = int(m.group(1))
  47       assert p >= 0 and p <= 100
  48       return p
  49
  50     def multipart_cpu_process(config):
  51       assert config['db'] == 'ndb-proto2' or \
  52              config['db'] == 'kvdb'
  53       if config['db'] == 'ndb-proto2':
  54         return config['threads']
  55       else:
  56         return 8
  57
  58     def readonly_lines_func(config):
  59       if 'disable-read-only-snapshots' in config['bench_opts']:
  60         return 'No-Snapshots'
  61       else:
  62         return 'Snapshots'
  63
  64     def MFormatter(x, p):
  65       if x == 0:
  66         return '0'
  67       v = float(x)/float(10**6)
  68       if math.ceil(v) == v:
  69         return '%dM' % v
  70       return '%.1fM' % v
  71
  72     def KFormatter(x, p):
  73       if x == 0:
  74         return '0'
  75       v = float(x)/float(10**3)
  76       if math.ceil(v) == v:
  77         return '%dK' % v
  78       return '%.1fK' % v
  79
  80     descs = [
  81       {
  82         'name' : 'scale',
  83         'x-axis' : 'threads',
  84         'x-axis-func' : lambda x: x,
  85         'y-axis' : deal_with_pos0_res,
  86         'lines' : ['db'], # each line holds this constant
  87         'x-label' : 'threads',
  88         'y-label' : 'throughput (txns/sec)',
  89         'y-axis-major-formatter' : matplotlib.ticker.FuncFormatter(MFormatter),
  90         'x-axis-set-major-locator' : True,
  91         #'title' : 'ycsb throughput graph',
  92       },
  93       {
  94         'name' : 'scale_tpcc',
  95         'x-axis' : 'threads',
  96         'x-axis-func' : lambda x: x,
  97         'y-axis' : deal_with_pos0_res,
  98         'lines' : ['db'], # each line holds this constant
  99         'x-label' : 'threads',
 100         'y-label' : 'throughput (txns/sec)',
 101         'y-axis-major-formatter' : matplotlib.ticker.FuncFormatter(MFormatter),
 102         'x-axis-set-major-locator' : True,
 103         #'title' : 'tpcc throughput graph',
 104       },
 105       {
 106         'name' : 'multipart:pct',
 107         'x-axis' : 'bench_opts',
 108         'x-axis-func' : extract_pct,
 109         'y-axis' : deal_with_pos0_res,
 110         'lines' : ['db'], # each line holds this constant
 111         'x-label' : '% cross-partition',
 112         'y-label' : 'throughput (txns/sec)',
 113         'y-axis-major-formatter' : matplotlib.ticker.FuncFormatter(MFormatter),
 114         'x-axis-set-major-locator' : False,
 115         #'title' : 'tpcc new-order throughput graph',
 116         'legend' : 'upper right',
 117       },
 118       #{
 119       #  'name' : 'multipart:cpu',
 120       #  'x-axis-process' : multipart_cpu_process,
 121       #  'y-axis' : deal_with_pos0_res,
 122       #  'lines' : ['db'], # each line holds this constant
 123       #  'x-label' : 'num threads',
 124       #  'y-label' : 'txns/sec',
 125       #  'title' : 'tpcc full workload throughput graph',
 126       #},
 127       {
 128         'name' : 'readonly',
 129         'x-axis' : 'bench_opts',
 130         'x-axis-func' : extract_p,
 131         'y-axis' : deal_with_pos0_res,
 132         'lines-func' : readonly_lines_func,
 133         'x-label' : '% remote warehouse stock',
 134         'y-label' : 'throughput (txns/sec)',
 135         'y-axis-major-formatter' : matplotlib.ticker.FuncFormatter(KFormatter),
 136         'x-axis-set-major-locator' : True,
 137         #'title' : 'tpcc read only throughput graph',
 138         'legend' : 'right',
 139       },
 140     ]
 141
 142     def label_transform(x):
 143       if x == 'kvdb':
 144         return 'Key-Value'
 145       if x == 'ndb-proto1':
 146         return 'Malflingo-Star'
 147       if x == 'ndb-proto2':
 148         return 'Malflingo'
 149       if x == 'kvdb-st':
 150         return 'Partitioned-Store'
 151       return x
 152
 153     for desc in descs:
 154       bench = desc['name']
 155       bench_results = [d for d in RESULTS if d[0]['name'] == bench]
 156       if not bench_results:
 157         print >>sys.stderr, 'skipping bench %s' % bench
 158         continue
 159       lines = {}
 160       for (config, result) in bench_results:
 161         if 'lines-func' in desc:
 162           key = desc['lines-func'](config)
 163         else:
 164           key = tuple(config[x] for x in desc['lines'])
 165         pts = lines.get(key, {})
 166         if 'x-axis-process' in desc:
 167           xpt = desc['x-axis-process'](config)
 168         else:
 169           xpt = desc['x-axis-func'](config[desc['x-axis']])
 170         assert xpt not in pts
 171         pts[xpt] = desc['y-axis'](result)
 172         lines[key] = pts
 173
 174       def mean(x): return sum(x)/len(x)
 175       def median(x): return sorted(x)[len(x)/2]
 176
 177       # find min/max of xpts
 178       xmin = min([e for l in lines.values() for e in l])
 179       xmax = max([e for l in lines.values() for e in l])
 180       #print xmin, xmax
 181
 182       labels = []
 183       for (name, pts) in lines.iteritems():
 184         spts = sorted(pts.iteritems(), key=lambda x: x[0])
 185         ypts = [sorted(x[1]) for x in spts]
 186         ymins = np.array([min(x) for x in ypts])
 187         ymaxs = np.array([max(x) for x in ypts])
 188         ymid = np.array([median(x) for x in ypts])
 189         yerr=np.array([ymid - ymins, ymaxs - ymid])
 190         xpts = [x[0] for x in spts]
 191         assert len(xpts)
 192         if len(xpts) == 1:
 193           xpts = range(xmin, xmax + 1)
 194           assert len(ymins) == 1
 195           assert len(ymaxs) == 1
 196           assert len(ymid) == 1
 197           ymins = np.array([ymins[0] for _ in xpts])
 198           ymaxs = np.array([ymaxs[0] for _ in xpts])
 199           ymid = np.array([ymid[0] for _ in xpts])
 200           yerr=np.array([ymid - ymins, ymaxs - ymid])
 201
 202         plt.errorbar(xpts, ymid, yerr=yerr)
 203         if type(name) == str:
 204           labels.append(label_transform(name))
 205         else:
 206           labels.append(label_transform('-'.join(name)))
 207
 208       ax = plt.gca()
 209       if desc['x-axis-set-major-locator']:
 210         ax.xaxis.set_major_locator(matplotlib.ticker.FixedLocator(sorted(lines.values()[0].keys())))
 211       if 'y-axis-major-formatter' in desc:
 212         ax.yaxis.set_major_formatter(desc['y-axis-major-formatter'])
 213
 214       plt.xlabel(desc['x-label'])
 215       plt.ylabel(desc['y-label'])
 216       if 'title' in desc:
 217         plt.title(desc['title'])
 218
 219       plt.xlim(xmin = xmin, xmax = xmax)
 220       plt.ylim(ymin = 0)
 221
 222       placement = 'upper left' if not 'legend' in desc else desc['legend']
 223       plt.legend(labels, loc=placement)
 224       bname = '.'.join(os.path.basename(f).split('.')[:-1])
 225       plt.savefig('.'.join([bname + '-' + bench, 'pdf']))
 226       plt.close()