11 import itertools as it
13 # XXX: import from runner.py
14 PERSIST_REAL='persist-real'
15 PERSIST_TEMP='persist-temp'
16 PERSIST_NONE='persist-none'
18 NEW_ORDER_RGX = re.compile(r'--new-order-remote-item-pct (\d+)')
19 def extract_raw_pct(x):
20 x = x[0]['bench_opts']
21 m = NEW_ORDER_RGX.search(x)
24 assert p >= 0 and p <= 100
28 p = extract_raw_pct(x)
30 return 1.0 - (1.0 - p)**n
33 return math.fsum([(1.0/11.0)*pn(float(n), p) for n in range(5, 16)])
34 return ex(p/100.0) * 100.0
36 def mean(x): return sum(x)/len(x)
37 def median(x): return sorted(x)[len(x)/2]
39 def extract_nthreads(x):
43 def deal_with_posK_res(k):
47 return [e[k] for e in x]
51 def deal_with_posK_res_median(k):
55 return median([e[k] for e in x])
59 def extract_latency(x):
60 return deal_with_posK_res(2)(x) if x[0]['persist'] == PERSIST_NONE else \
61 deal_with_posK_res(3)(x)
63 def deal_with_posK_res_percore(k):
65 nthds = float(extract_nthreads(x))
68 return [e[k]/nthds for e in x]
73 best, bestlen = ls[0], len(ls[0])
74 for i in xrange(1, len(ls)):
75 if len(ls[i]) > bestlen:
76 best, bestline = ls[i], len(ls[i])
80 return tuple(sorted(d.items(), key=lambda x: x[0]))
87 return list(it.chain.from_iterable(ylist))
94 return [(keytodict(x), combine(ys)) for x, ys in d.iteritems()]
96 def mkplot(results, desc, outfilename):
99 double_axis = type(desc['y-axis']) == list
100 assert not double_axis or len(desc['y-axis']) == 2
105 for line_desc in desc['lines']:
106 predfn = line_desc['extractor']
107 line_results = merge([d for d in results if predfn(d)])
108 xpts = map(desc['x-axis'], line_results)
110 ypts = map(desc['y-axis'], line_results)
112 ypts = (map(desc['y-axis'][0], line_results),
113 map(desc['y-axis'][1], line_results))
114 ypts = [(desc['y-axis'][0](x), desc['y-axis'][1](x)) for x in line_results]
115 lines.append({ 'xpts' : xpts, 'ypts' : ypts })
116 longest = longest_line([x['xpts'] for x in lines])
117 for idx in xrange(len(desc['lines'])):
118 line_desc = desc['lines'][idx]
119 if 'extend' in line_desc and line_desc['extend']:
120 assert not double_axis
121 assert len(lines[idx]['xpts']) == 1
122 lines[idx]['xpts'] = longest
123 lines[idx]['ypts'] = [lines[idx]['ypts'][0] for _ in longest]
125 for i in xrange(len(lines)):
127 l = sorted(zip(l['xpts'], l['ypts']), key=lambda x: x[0])
128 lines[i] = { 'xpts' : [x[0] for x in l], 'ypts' : [y[1] for y in l] }
129 if not desc['show-error-bars']:
132 ax.plot(l['xpts'], [median(y) for y in l['ypts']])
134 ax.plot(l['xpts'], [median(y[0]) for y in l['ypts']])
135 ax1.plot(l['xpts'], [median(y[1]) for y in l['ypts']])
139 ymins = np.array([min(y) for y in l['ypts']])
140 ymaxs = np.array([max(y) for y in l['ypts']])
141 ymid = np.array([median(y) for y in l['ypts']])
142 yerr = np.array([ymid - ymins, ymaxs - ymid])
143 ax.errorbar(l['xpts'], ymid, yerr=yerr)
145 ymins = np.array([min(y[0]) for y in l['ypts']])
146 ymaxs = np.array([max(y[0]) for y in l['ypts']])
147 ymid = np.array([median(y[0]) for y in l['ypts']])
148 yerr = np.array([ymid - ymins, ymaxs - ymid])
149 ax.errorbar(l['xpts'], ymid, yerr=yerr)
151 ymins = np.array([min(y[1]) for y in l['ypts']])
152 ymaxs = np.array([max(y[1]) for y in l['ypts']])
153 ymid = np.array([median(y[1]) for y in l['ypts']])
154 yerr = np.array([ymid - ymins, ymaxs - ymid])
155 ax1.errorbar(l['xpts'], ymid, yerr=yerr)
157 ax.set_xlabel(desc['x-label'])
158 ax.set_ylabel(desc['y-label'] if not double_axis else desc['y-label'][0])
159 ax.set_xlim(xmin = min(longest), xmax = max(longest))
160 ax.set_ylim(ymin = 0)
161 ax.legend([l['label'] for l in desc['lines']], loc=desc['legend'])
162 if 'y-axis-major-formatter' in desc:
163 ax.yaxis.set_major_formatter(
164 desc['y-axis-major-formatter'] if not double_axis \
165 else desc['y-axis-major-formatter'][0])
168 _, axmax = ax.get_ylim()
169 ax1.set_ylabel(desc['y-label'][1])
170 ax1.set_ylim(ymin = 0, ymax = axmax)
171 if 'y-axis-major-formatter' in desc:
172 ax1.yaxis.set_major_formatter(
173 desc['y-axis-major-formatter'][1])
176 ax.set_title(desc['title'])
177 if 'subplots-adjust' in desc:
178 fig.subplots_adjust(**desc['subplots-adjust'])
179 fig.savefig(outfilename, format='pdf')
181 def mkbar(results, desc, outfilename):
183 ax = plt.subplot(111)
185 for bar_desc in desc['bars']:
186 predfn = bar_desc['extractor']
187 bar_results = merge([d for d in results if predfn(d)])
188 if len(bar_results) != 1:
189 print "bar_results:", bar_results
190 assert len(bar_results) == 1, 'bad predicate'
191 bars.append({ 'ypts' : desc['y-axis'](bar_results[0]) })
193 inds = np.arange(len(bars)) * width
194 if not desc['show-error-bars']:
195 ax.bar(inds, [median(y['ypts']) for y in bars], width)
201 yerr = [ymid - ymin, ymax - ymid]
203 yerrs = [[geterr(y['ypts'])[0] for y in bars],
204 [geterr(y['ypts'])[1] for y in bars]]
205 ax.bar(inds, [median(y['ypts']) for y in bars], width, yerr=yerrs)
206 ax.set_xticks(inds + width/2.)
207 ax.set_xticklabels( [l['label'] for l in desc['bars']], rotation='vertical' )
208 ax.set_ylabel(desc['y-label'])
209 ax.set_ylim(ymin = 0)
210 if 'y-axis-major-formatter' in desc:
211 ax.yaxis.set_major_formatter(desc['y-axis-major-formatter'])
213 ax.set_title(desc['title'])
214 SI = fig.get_size_inches()
215 if 'subplots-adjust' in desc:
216 fig.subplots_adjust(**desc['subplots-adjust'])
217 fig.set_size_inches((SI[0]/2., SI[1]))
218 fig.savefig(outfilename, format='pdf')
220 def MFormatter(x, p):
223 v = float(x)/float(10**6)
224 if math.ceil(v) == v:
228 def KFormatter(x, p):
231 v = float(x)/float(10**3)
232 if math.ceil(v) == v:
236 TPCC_REGULAR_MIX=[45, 43, 4, 4, 4]
237 TPCC_REALISTIC_MIX=[39, 37, 4, 10, 10]
238 if __name__ == '__main__':
239 matplotlib.rcParams.update({'figure.autolayout' : True})
241 def tpcc_fast_id_extractor(enabled):
243 return lambda x: x[0]['bench_opts'].find('--new-order-fast-id-gen') != -1
245 return lambda x: x[0]['bench_opts'].find('--new-order-fast-id-gen') == -1
247 def db_extractor(db):
248 return lambda x: x[0]['db'] == db
250 def name_extractor(name):
251 return lambda x: x[0]['name'] == name
253 def persist_extractor(mode):
254 return lambda x: 'persist' in x[0] and x[0]['persist'] == mode
256 def binary_extractor(binary):
257 return lambda x: x[0]['binary'] == binary
259 def snapshots_extractor(enabled):
261 return lambda x: 'disable_snapshots' not in x[0] or not x[0]['disable_snapshots']
263 return lambda x: 'disable_snapshots' in x[0] and x[0]['disable_snapshots']
265 def ro_txns_extractor(enabled):
267 return lambda x: x[0]['bench_opts'].find('--disable-read-only-snapshots') == -1
269 return lambda x: x[0]['bench_opts'].find('--disable-read-only-snapshots') != -1
271 def gc_extractor(enabled):
273 return lambda x: 'disable_gc' not in x[0] or not x[0]['disable_gc']
275 return lambda x: 'disable_gc' in x[0] and x[0]['disable_gc']
277 def log_compress_extractor(enabled):
278 return lambda x: 'log_compress' in x[0] and x[0]['log_compress']
280 def numa_extractor(enabled):
282 return lambda x: x[0]['numa_memory'] is not None
284 return lambda x: x[0]['numa_memory'] is None
286 def sep_trees_extractor(enabled):
287 return lambda x: (x[0]['bench_opts'].find('--enable-separate-tree-per-partition') != -1) == enabled
289 def workload_mix_extractor(mix):
290 mixstr = '--workload-mix %s' % (','.join(map(str, mix)))
291 return lambda x: x[0]['bench_opts'].find(mixstr) != -1
293 def nthreads_extractor(nthreads):
294 return lambda x: x[0]['threads'] == nthreads
296 def AND(*extractors):
298 for ex in extractors:
306 for ex in extractors:
314 'file' : 'istc3-9-8-13.py',
315 'outfile' : 'istc3-9-8-13-scale_rmw.pdf',
316 'x-axis' : extract_nthreads,
317 'y-axis' : deal_with_posK_res(0),
320 'label' : 'Key-Value',
321 'extractor' : AND(name_extractor('scale_rmw'), db_extractor('kvdb')),
325 'extractor' : AND(name_extractor('scale_rmw'), db_extractor('ndb-proto2')),
328 'label' : 'Silo+GlobalTID',
329 'extractor' : AND(name_extractor('scale_rmw'), db_extractor('ndb-proto1')),
332 'x-label' : 'nthreads',
333 'y-label' : 'throughput (txns/sec)',
334 'y-axis-major-formatter' : matplotlib.ticker.FuncFormatter(MFormatter),
335 'x-axis-set-major-locator' : False,
336 'show-error-bars' : True,
337 'legend' : 'upper left',
338 'title' : 'YCSB scale',
341 'file' : 'istc3-9-8-13.py',
342 'outfile' : 'istc3-9-8-13-scale_rmw-percore.pdf',
343 'x-axis' : extract_nthreads,
344 'y-axis' : deal_with_posK_res_percore(0),
347 'label' : 'Key-Value',
348 'extractor' : AND(name_extractor('scale_rmw'), db_extractor('kvdb')),
352 'extractor' : AND(name_extractor('scale_rmw'), db_extractor('ndb-proto2')),
355 'label' : 'Silo+GlobalTID',
356 'extractor' : AND(name_extractor('scale_rmw'), db_extractor('ndb-proto1')),
359 'x-label' : 'nthreads',
360 'y-label' : 'throughput/core (txns/sec/core)',
361 'y-axis-major-formatter' : matplotlib.ticker.FuncFormatter(KFormatter),
362 'x-axis-set-major-locator' : False,
363 'show-error-bars' : True,
364 'legend' : 'lower left',
365 'title' : 'YCSB scale per-core',
368 'file' : 'istc3-9-8-13.py',
369 'outfile' : 'istc3-9-8-13-scale_tpcc.pdf',
370 'x-axis' : extract_nthreads,
371 'y-axis' : deal_with_posK_res(0),
376 name_extractor('scale_tpcc'),
377 persist_extractor('persist-none')),
380 'label' : 'Silo+PersistTemp',
382 name_extractor('scale_tpcc'),
383 persist_extractor('persist-temp')),
386 'label' : 'Silo+Persist',
388 name_extractor('scale_tpcc'),
389 persist_extractor('persist-real')),
392 'x-label' : 'nthreads',
393 'y-label' : 'throughput (txns/sec)',
394 'y-axis-major-formatter' : matplotlib.ticker.FuncFormatter(KFormatter),
395 'x-axis-set-major-locator' : False,
396 'show-error-bars' : True,
397 'legend' : 'upper left',
398 'title' : 'TPC-C scale (standard mix)',
401 'file' : 'istc3-9-8-13.py',
402 'outfile' : 'istc3-9-8-13-scale_tpcc-percore.pdf',
403 'x-axis' : extract_nthreads,
404 'y-axis' : deal_with_posK_res_percore(0),
409 name_extractor('scale_tpcc'),
410 persist_extractor('persist-none')),
413 'label' : 'Silo+PersistTemp',
415 name_extractor('scale_tpcc'),
416 persist_extractor('persist-temp')),
419 'label' : 'Silo+Persist',
421 name_extractor('scale_tpcc'),
422 persist_extractor('persist-real')),
425 'x-label' : 'nthreads',
426 'y-label' : 'throughput/core (txns/sec/core)',
427 'y-axis-major-formatter' : matplotlib.ticker.FuncFormatter(KFormatter),
428 'x-axis-set-major-locator' : False,
429 'show-error-bars' : True,
430 'legend' : 'lower left',
431 'title' : 'TPC-C scale per-core (standard mix)',
434 'file' : 'istc3-9-8-13.py',
435 'outfile' : 'istc3-9-8-13-multipart_pct.pdf',
436 'x-axis' : extract_pct,
437 'y-axis' : deal_with_posK_res(0),
440 'label' : 'Partition-Store',
442 name_extractor('multipart:pct'),
443 db_extractor('kvdb-st')),
446 'label' : 'Maflingo',
448 name_extractor('multipart:pct'),
449 db_extractor('ndb-proto2'),
450 snapshots_extractor(True)),
453 'label' : 'Maflingo+NoSS',
455 name_extractor('multipart:pct'),
456 db_extractor('ndb-proto2'),
457 snapshots_extractor(False),
458 sep_trees_extractor(False)),
461 'label' : 'Partition-Maflingo+NoSS',
463 name_extractor('multipart:pct'),
464 db_extractor('ndb-proto2'),
465 snapshots_extractor(False),
466 sep_trees_extractor(True)),
469 'x-label' : '% cross-partition',
470 'y-label' : 'throughput (txns/sec)',
471 'y-axis-major-formatter' : matplotlib.ticker.FuncFormatter(MFormatter),
472 'x-axis-set-major-locator' : False,
473 'show-error-bars' : True,
474 'legend' : 'upper right',
475 'title' : 'TPC-C new order multi-partition',
478 'file' : 'istc3-9-8-13.py',
479 'outfile' : 'istc3-9-8-13-multipart_skew.pdf',
480 'x-axis' : extract_nthreads,
481 'y-axis' : deal_with_posK_res(0),
484 'label' : 'Partition-Store',
485 'extractor' : AND(name_extractor('multipart:skew'), db_extractor('kvdb-st')),
491 name_extractor('multipart:skew'),
492 db_extractor('ndb-proto2'),
493 tpcc_fast_id_extractor(False)),
496 'label' : 'Silo+FastIds',
498 name_extractor('multipart:skew'),
499 db_extractor('ndb-proto2'),
500 tpcc_fast_id_extractor(True)),
503 'x-label' : 'nthreads',
504 'y-label' : 'throughput (txns/sec)',
505 'y-axis-major-formatter' : matplotlib.ticker.FuncFormatter(KFormatter),
506 'x-axis-set-major-locator' : False,
507 'show-error-bars' : True,
508 'legend' : 'upper left',
509 'title' : 'TPC-C new order skew',
512 'file' : 'istc3-9-8-13.py',
513 'outfile' : 'istc3-9-8-13-factor-analysis.pdf',
514 'y-axis' : deal_with_posK_res(0),
517 'label' : 'Baseline',
519 name_extractor('factoranalysis'),
520 db_extractor('ndb-proto2'),
521 binary_extractor('../out-factor-gc-nowriteinplace/benchmarks/dbtest'),
522 snapshots_extractor(True),
523 numa_extractor(False)),
526 'label' : '+NumaAllocator',
528 name_extractor('factoranalysis'),
529 db_extractor('ndb-proto2'),
530 binary_extractor('../out-factor-gc-nowriteinplace/benchmarks/dbtest'),
531 snapshots_extractor(True),
532 numa_extractor(True)),
535 'label' : '+Overwrites',
537 name_extractor('factoranalysis'),
538 db_extractor('ndb-proto2'),
539 binary_extractor('../out-factor-gc/benchmarks/dbtest'),
540 snapshots_extractor(True),
541 numa_extractor(True)),
544 'label' : '-Snapshots',
546 name_extractor('factoranalysis'),
547 db_extractor('ndb-proto2'),
548 binary_extractor('../out-factor-gc/benchmarks/dbtest'),
549 snapshots_extractor(False),
551 numa_extractor(True)),
556 name_extractor('factoranalysis'),
557 db_extractor('ndb-proto2'),
558 binary_extractor('../out-factor-gc/benchmarks/dbtest'),
559 snapshots_extractor(False),
561 numa_extractor(True)),
564 'y-label' : 'throughput (txns/sec)',
565 'y-axis-major-formatter' : matplotlib.ticker.FuncFormatter(KFormatter),
566 'x-axis-set-major-locator' : False,
567 'show-error-bars' : True,
568 'subplots-adjust' : {'bottom' : 0.25},
571 'file' : 'istc3-9-8-13.py',
572 'outfile' : 'istc3-9-8-13-persist-factor-analysis.pdf',
573 'y-axis' : deal_with_posK_res(0),
576 'label' : 'NoPersist',
578 name_extractor('scale_tpcc'),
579 nthreads_extractor(28),
580 persist_extractor('persist-none'),
581 db_extractor('ndb-proto2')),
584 'label' : 'ConstRecs',
586 name_extractor('persistfactoranalysis'),
587 binary_extractor('../out-factor-fake-compression/benchmarks/dbtest'),
588 persist_extractor('persist-real'),
589 numa_extractor(True)),
594 name_extractor('scale_tpcc'),
595 nthreads_extractor(28),
596 persist_extractor('persist-real'),
597 db_extractor('ndb-proto2')),
600 'label' : 'Compress',
602 name_extractor('persistfactoranalysis'),
603 persist_extractor('persist-real'),
604 log_compress_extractor(True),
605 numa_extractor(True)),
608 'y-label' : 'throughput (txns/sec)',
609 'y-axis-major-formatter' : matplotlib.ticker.FuncFormatter(KFormatter),
610 'x-axis-set-major-locator' : False,
611 'show-error-bars' : True,
612 'subplots-adjust' : {'bottom' : 0.2},
615 'file' : 'istc3-9-8-13.py',
616 'outfile' : 'istc3-9-8-13-readonly.pdf',
617 'x-axis' : extract_raw_pct,
618 'y-axis' : [deal_with_posK_res(0), deal_with_posK_res(4)],
621 'label' : '+Snapshots',
623 name_extractor('readonly'),
624 snapshots_extractor(True)),
627 'label' : '-Snapshots',
629 name_extractor('readonly'),
630 snapshots_extractor(False)),
633 'x-label' : '% remote warehouse',
634 'y-label' : ['throughput (txns/sec)', 'aborts/sec'],
635 'y-axis-major-formatter' : [
636 matplotlib.ticker.FuncFormatter(KFormatter),
637 matplotlib.ticker.FuncFormatter(KFormatter)
640 'x-axis-set-major-locator' : False,
641 'show-error-bars' : True,
644 'file' : 'istc3-9-8-13.py',
645 'outfile' : 'istc3-9-8-13-scale_tpcc-latency.pdf',
646 'x-axis' : deal_with_posK_res_median(0),
647 'y-axis' : extract_latency,
652 name_extractor('scale_tpcc'),
653 persist_extractor('persist-none')),
656 'label' : 'Silo+PersistTemp',
658 name_extractor('scale_tpcc'),
659 persist_extractor('persist-temp')),
662 'label' : 'Silo+Persist',
664 name_extractor('scale_tpcc'),
665 persist_extractor('persist-real')),
668 'x-label' : 'throughput (txns/sec)',
669 'y-label' : 'latency (ms)',
670 #'y-axis-major-formatter' : matplotlib.ticker.FuncFormatter(KFormatter),
671 'x-axis-set-major-locator' : False,
672 'show-error-bars' : True,
673 'legend' : 'upper left',
674 'title' : 'TPC-C scale (standard mix)',
678 def extract_from_files(f):
680 return list(it.chain.from_iterable([extract_from_files(ff) for ff in f]))
685 FINAL_OUTPUT_FILENAME='istc3-cameraready.pdf'
686 from PyPDF2 import PdfFileWriter, PdfFileReader
687 output = PdfFileWriter()
688 for config in configs:
689 #for config in [configs[-1]]:
690 res = extract_from_files(config['file'])
691 if 'lines' in config:
692 mkplot(res, config, config['outfile'])
693 elif 'bars' in config:
694 mkbar(res, config, config['outfile'])
696 assert False, "bad config"
697 inp = PdfFileReader(open(config['outfile'], 'rb'))
698 output.addPage(inp.getPage(0))
699 print >>sys.stderr, '[INFO] finished', config['outfile']
701 output.write(file(FINAL_OUTPUT_FILENAME, 'wb'))