|
| 1 | +from numpy import * |
| 2 | +from scipy import stats |
| 3 | +import json, locale |
| 4 | +from optparse import OptionParser |
| 5 | + |
| 6 | +VALID_GROUP_BYS = ['browser', 'pdf', 'page', 'round', 'stat'] |
| 7 | +USAGE_EXAMPLE = "%prog BASELINE CURRENT" |
| 8 | +class TestOptions(OptionParser): |
| 9 | + def __init__(self, **kwargs): |
| 10 | + OptionParser.__init__(self, **kwargs) |
| 11 | + self.add_option("--groupBy", action="append", dest="groupBy", type="string", |
| 12 | + help="How the statistics should grouped. Valid options: " + ', '.join(VALID_GROUP_BYS) + '.', default=[]) |
| 13 | + |
| 14 | + self.set_usage(USAGE_EXAMPLE) |
| 15 | + |
| 16 | + def verifyOptions(self, options, args): |
| 17 | + if len(args) < 2: |
| 18 | + self.error('There must be two comparison files arguments.') |
| 19 | + # Veryify the group by options. |
| 20 | + groupBy = [] |
| 21 | + if not options.groupBy: |
| 22 | + options.groupBy = ['browser,stat', 'browser,pdf,stat'] |
| 23 | + for group in options.groupBy: |
| 24 | + group = group.split(',') |
| 25 | + for column in group: |
| 26 | + if column not in VALID_GROUP_BYS: |
| 27 | + self.error('Invalid group by option of "' + column + '"') |
| 28 | + groupBy.append(group) |
| 29 | + options.groupBy = groupBy |
| 30 | + |
| 31 | + return options |
| 32 | + |
| 33 | +## {{{ http://code.activestate.com/recipes/267662/ (r7) |
| 34 | +import cStringIO,operator |
| 35 | + |
| 36 | +def indent(rows, hasHeader=False, headerChar='-', delim=' | ', justify='left', |
| 37 | + separateRows=False, prefix='', postfix='', wrapfunc=lambda x:x): |
| 38 | + """Indents a table by column. |
| 39 | + - rows: A sequence of sequences of items, one sequence per row. |
| 40 | + - hasHeader: True if the first row consists of the columns' names. |
| 41 | + - headerChar: Character to be used for the row separator line |
| 42 | + (if hasHeader==True or separateRows==True). |
| 43 | + - delim: The column delimiter. |
| 44 | + - justify: Determines how are data justified in their column. |
| 45 | + Valid values are 'left','right' and 'center'. |
| 46 | + - separateRows: True if rows are to be separated by a line |
| 47 | + of 'headerChar's. |
| 48 | + - prefix: A string prepended to each printed row. |
| 49 | + - postfix: A string appended to each printed row. |
| 50 | + - wrapfunc: A function f(text) for wrapping text; each element in |
| 51 | + the table is first wrapped by this function.""" |
| 52 | + # closure for breaking logical rows to physical, using wrapfunc |
| 53 | + def rowWrapper(row): |
| 54 | + newRows = [wrapfunc(str(item)).split('\n') for item in row] |
| 55 | + return [[substr or '' for substr in item] for item in map(None,*newRows)] |
| 56 | + # break each logical row into one or more physical ones |
| 57 | + logicalRows = [rowWrapper(row) for row in rows] |
| 58 | + # columns of physical rows |
| 59 | + columns = map(None,*reduce(operator.add,logicalRows)) |
| 60 | + # get the maximum of each column by the string length of its items |
| 61 | + maxWidths = [max([len(str(item)) for item in column]) for column in columns] |
| 62 | + rowSeparator = headerChar * (len(prefix) + len(postfix) + sum(maxWidths) + \ |
| 63 | + len(delim)*(len(maxWidths)-1)) |
| 64 | + # select the appropriate justify method |
| 65 | + justify = {'center':str.center, 'right':str.rjust, 'left':str.ljust}[justify.lower()] |
| 66 | + output=cStringIO.StringIO() |
| 67 | + if separateRows: print >> output, rowSeparator |
| 68 | + for physicalRows in logicalRows: |
| 69 | + for row in physicalRows: |
| 70 | + print >> output, \ |
| 71 | + prefix \ |
| 72 | + + delim.join([justify(str(item),width) for (item,width) in zip(row,maxWidths)]) \ |
| 73 | + + postfix |
| 74 | + if separateRows or hasHeader: print >> output, rowSeparator; hasHeader=False |
| 75 | + return output.getvalue() |
| 76 | + |
| 77 | +# written by Mike Brown |
| 78 | +# http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/148061 |
| 79 | +def wrap_onspace(text, width): |
| 80 | + """ |
| 81 | + A word-wrap function that preserves existing line breaks |
| 82 | + and most spaces in the text. Expects that existing line |
| 83 | + breaks are posix newlines (\n). |
| 84 | + """ |
| 85 | + return reduce(lambda line, word, width=width: '%s%s%s' % |
| 86 | + (line, |
| 87 | + ' \n'[(len(line[line.rfind('\n')+1:]) |
| 88 | + + len(word.split('\n',1)[0] |
| 89 | + ) >= width)], |
| 90 | + word), |
| 91 | + text.split(' ') |
| 92 | + ) |
| 93 | + |
| 94 | +import re |
| 95 | +def wrap_onspace_strict(text, width): |
| 96 | + """Similar to wrap_onspace, but enforces the width constraint: |
| 97 | + words longer than width are split.""" |
| 98 | + wordRegex = re.compile(r'\S{'+str(width)+r',}') |
| 99 | + return wrap_onspace(wordRegex.sub(lambda m: wrap_always(m.group(),width),text),width) |
| 100 | + |
| 101 | +import math |
| 102 | +def wrap_always(text, width): |
| 103 | + """A simple word-wrap function that wraps text on exactly width characters. |
| 104 | + It doesn't split the text in words.""" |
| 105 | + return '\n'.join([ text[width*i:width*(i+1)] \ |
| 106 | + for i in xrange(int(math.ceil(1.*len(text)/width))) ]) |
| 107 | + |
| 108 | +def formatTime(time): |
| 109 | + return locale.format("%.*f", (0, time), True) |
| 110 | + |
| 111 | +# Group the stats by keys. We should really just stick these in a SQL database |
| 112 | +# so we aren't reiventing the wheel. |
| 113 | +def group(stats, groupBy): |
| 114 | + vals = {} |
| 115 | + for stat in stats: |
| 116 | + key = [] |
| 117 | + for group in groupBy: |
| 118 | + key.append(stat[group]) |
| 119 | + key = tuple(key) |
| 120 | + if key not in vals: |
| 121 | + vals[key] = [] |
| 122 | + vals[key].append(stat['time']) |
| 123 | + return vals; |
| 124 | + |
| 125 | + |
| 126 | +def mean(l): |
| 127 | + return array(l).mean() |
| 128 | + |
| 129 | + |
| 130 | +# Take the somewhat normalized stats file and flatten it so there is a row for |
| 131 | +# every recorded stat. |
| 132 | +def flatten(stats): |
| 133 | + rows = [] |
| 134 | + for stat in stats: |
| 135 | + for s in stat['stats']: |
| 136 | + rows.append({ |
| 137 | + 'browser': stat['browser'], |
| 138 | + 'page': stat['page'], |
| 139 | + 'pdf': stat['pdf'], |
| 140 | + 'round': stat['round'], |
| 141 | + 'stat': s['name'], |
| 142 | + 'time': int(s['end']) - int(s['start']) |
| 143 | + }) |
| 144 | + return rows |
| 145 | + |
| 146 | +# Dump various stats in a table to compare the baseline and current results. |
| 147 | +# T-test Refresher: |
| 148 | +# If I understand t-test correctly, p is the probability that we'll observe |
| 149 | +# another test that is as extreme as the current result assuming the null |
| 150 | +# hypothesis is true. P is NOT the probability of the null hypothesis. |
| 151 | +# The null hypothesis in this case is that the baseline and current results will |
| 152 | +# be the same. It is generally accepted that you can reject the null hypothesis |
| 153 | +# if the p-value is less than 0.05. So if p < 0.05 we can reject the results |
| 154 | +# are the same which doesn't necessarily mean the results are faster/slower but |
| 155 | +# it can be implied. |
| 156 | +def stat(baseline, current, groupBy): |
| 157 | + labels = groupBy + ['Baseline(ms)', 'Current(ms)', '+/-', '%', 'Result(P<.05)'] |
| 158 | + baselineGroup = group(baseline, groupBy) |
| 159 | + currentGroup = group(current, groupBy) |
| 160 | + rows = [] |
| 161 | + for key in baselineGroup: |
| 162 | + t, p = stats.ttest_ind(baselineGroup[key], currentGroup[key], equal_var = False) |
| 163 | + baseline = mean(baselineGroup[key]) |
| 164 | + current = mean(currentGroup[key]) |
| 165 | + speed = '' |
| 166 | + if p < 0.05: |
| 167 | + speed = 'faster' if current < baseline else 'slower' |
| 168 | + row = list(key) |
| 169 | + row += [ |
| 170 | + formatTime(baseline), |
| 171 | + formatTime(current), |
| 172 | + formatTime(baseline - current), |
| 173 | + round(100 * (1.0 * baseline - current) / baseline, 2), |
| 174 | + speed |
| 175 | + ] |
| 176 | + rows.append(row) |
| 177 | + rows.sort(key=lambda row: tuple(row[0:len(groupBy)])) |
| 178 | + print indent([labels] + rows, hasHeader=True) |
| 179 | + |
| 180 | +def main(): |
| 181 | + optionParser = TestOptions() |
| 182 | + options, args = optionParser.parse_args() |
| 183 | + options = optionParser.verifyOptions(options, args) |
| 184 | + if options == None: |
| 185 | + sys.exit(1) |
| 186 | + |
| 187 | + with open(args[0]) as baselineFile: |
| 188 | + baseline = flatten(json.load(baselineFile)) |
| 189 | + with open(args[1]) as currentFile: |
| 190 | + current = flatten(json.load(currentFile)) |
| 191 | + |
| 192 | + for groupBy in options.groupBy: |
| 193 | + print "-- Grouped By " + ', '.join(groupBy) + ' -- ' |
| 194 | + stat(baseline, current, groupBy) |
| 195 | + |
| 196 | + |
| 197 | +if __name__ == '__main__': |
| 198 | + main() |
0 commit comments