Skip to content

Commit 9583cb7

Browse files
committed
Merge pull request mozilla#2449 from brendandahl/statsrecord
Start of the stat utilities.
2 parents e740533 + 9e50706 commit 9583cb7

File tree

4 files changed

+244
-4
lines changed

4 files changed

+244
-4
lines changed

test/driver.js

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
// https://github.com/mozilla/pdf.js/pull/764#issuecomment-2638944
2626
// "firefox-bin: Fatal IO error 12 (Cannot allocate memory) on X server :1."
2727
// PDFJS.disableWorker = true;
28+
PDFJS.enableStats = true;
2829

2930
var appPath, browser, canvas, dummyCanvas, currentTaskIdx, manifest, stdout;
3031
var inFlightRequests = 0;
@@ -45,6 +46,7 @@ function load() {
4546
browser = params.browser;
4647
var manifestFile = params.manifestFile;
4748
appPath = params.path;
49+
var delay = params.delay || 0;
4850

4951
canvas = document.createElement('canvas');
5052
canvas.mozOpaque = true;
@@ -67,7 +69,14 @@ function load() {
6769
nextTask();
6870
}
6971
};
70-
r.send(null);
72+
if (delay) {
73+
log('\nDelaying for ' + delay + 'ms...\n');
74+
}
75+
// When gathering the stats the numbers seem to be more reliable if the
76+
// browser is given more time to startup.
77+
setTimeout(function() {
78+
r.send(null);
79+
}, delay);
7180
}
7281

7382
function cleanup() {
@@ -110,6 +119,7 @@ function nextTask() {
110119
}
111120
var task = manifest[currentTaskIdx];
112121
task.round = 0;
122+
task.stats = {times: []};
113123

114124
log('Loading file "' + task.file + '"\n');
115125

@@ -265,6 +275,8 @@ function nextPage(task, loadError) {
265275
};
266276
var completeRender = (function(error) {
267277
page.destroy();
278+
task.stats = page.stats;
279+
page.stats = new StatTimer();
268280
snapshotCurrentPage(task, error);
269281
});
270282
page.render(renderContext).then(function() {
@@ -340,7 +352,8 @@ function sendTaskResult(snapshot, task, failure, result) {
340352
file: task.file,
341353
round: task.round,
342354
page: task.pageNum,
343-
snapshot: snapshot
355+
snapshot: snapshot,
356+
stats: task.stats.times
344357
});
345358
}
346359

test/stats/results/.gitignore

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# Ignore everything in this directory
2+
*
3+
# Except this file
4+
!.gitignore
5+

test/stats/statcmp.py

Lines changed: 198 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,198 @@
1+
from numpy import *
2+
from scipy import stats
3+
import json, locale
4+
from optparse import OptionParser
5+
6+
VALID_GROUP_BYS = ['browser', 'pdf', 'page', 'round', 'stat']
7+
USAGE_EXAMPLE = "%prog BASELINE CURRENT"
8+
class TestOptions(OptionParser):
9+
def __init__(self, **kwargs):
10+
OptionParser.__init__(self, **kwargs)
11+
self.add_option("--groupBy", action="append", dest="groupBy", type="string",
12+
help="How the statistics should grouped. Valid options: " + ', '.join(VALID_GROUP_BYS) + '.', default=[])
13+
14+
self.set_usage(USAGE_EXAMPLE)
15+
16+
def verifyOptions(self, options, args):
17+
if len(args) < 2:
18+
self.error('There must be two comparison files arguments.')
19+
# Veryify the group by options.
20+
groupBy = []
21+
if not options.groupBy:
22+
options.groupBy = ['browser,stat', 'browser,pdf,stat']
23+
for group in options.groupBy:
24+
group = group.split(',')
25+
for column in group:
26+
if column not in VALID_GROUP_BYS:
27+
self.error('Invalid group by option of "' + column + '"')
28+
groupBy.append(group)
29+
options.groupBy = groupBy
30+
31+
return options
32+
33+
## {{{ http://code.activestate.com/recipes/267662/ (r7)
34+
import cStringIO,operator
35+
36+
def indent(rows, hasHeader=False, headerChar='-', delim=' | ', justify='left',
37+
separateRows=False, prefix='', postfix='', wrapfunc=lambda x:x):
38+
"""Indents a table by column.
39+
- rows: A sequence of sequences of items, one sequence per row.
40+
- hasHeader: True if the first row consists of the columns' names.
41+
- headerChar: Character to be used for the row separator line
42+
(if hasHeader==True or separateRows==True).
43+
- delim: The column delimiter.
44+
- justify: Determines how are data justified in their column.
45+
Valid values are 'left','right' and 'center'.
46+
- separateRows: True if rows are to be separated by a line
47+
of 'headerChar's.
48+
- prefix: A string prepended to each printed row.
49+
- postfix: A string appended to each printed row.
50+
- wrapfunc: A function f(text) for wrapping text; each element in
51+
the table is first wrapped by this function."""
52+
# closure for breaking logical rows to physical, using wrapfunc
53+
def rowWrapper(row):
54+
newRows = [wrapfunc(str(item)).split('\n') for item in row]
55+
return [[substr or '' for substr in item] for item in map(None,*newRows)]
56+
# break each logical row into one or more physical ones
57+
logicalRows = [rowWrapper(row) for row in rows]
58+
# columns of physical rows
59+
columns = map(None,*reduce(operator.add,logicalRows))
60+
# get the maximum of each column by the string length of its items
61+
maxWidths = [max([len(str(item)) for item in column]) for column in columns]
62+
rowSeparator = headerChar * (len(prefix) + len(postfix) + sum(maxWidths) + \
63+
len(delim)*(len(maxWidths)-1))
64+
# select the appropriate justify method
65+
justify = {'center':str.center, 'right':str.rjust, 'left':str.ljust}[justify.lower()]
66+
output=cStringIO.StringIO()
67+
if separateRows: print >> output, rowSeparator
68+
for physicalRows in logicalRows:
69+
for row in physicalRows:
70+
print >> output, \
71+
prefix \
72+
+ delim.join([justify(str(item),width) for (item,width) in zip(row,maxWidths)]) \
73+
+ postfix
74+
if separateRows or hasHeader: print >> output, rowSeparator; hasHeader=False
75+
return output.getvalue()
76+
77+
# written by Mike Brown
78+
# http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/148061
79+
def wrap_onspace(text, width):
80+
"""
81+
A word-wrap function that preserves existing line breaks
82+
and most spaces in the text. Expects that existing line
83+
breaks are posix newlines (\n).
84+
"""
85+
return reduce(lambda line, word, width=width: '%s%s%s' %
86+
(line,
87+
' \n'[(len(line[line.rfind('\n')+1:])
88+
+ len(word.split('\n',1)[0]
89+
) >= width)],
90+
word),
91+
text.split(' ')
92+
)
93+
94+
import re
95+
def wrap_onspace_strict(text, width):
96+
"""Similar to wrap_onspace, but enforces the width constraint:
97+
words longer than width are split."""
98+
wordRegex = re.compile(r'\S{'+str(width)+r',}')
99+
return wrap_onspace(wordRegex.sub(lambda m: wrap_always(m.group(),width),text),width)
100+
101+
import math
102+
def wrap_always(text, width):
103+
"""A simple word-wrap function that wraps text on exactly width characters.
104+
It doesn't split the text in words."""
105+
return '\n'.join([ text[width*i:width*(i+1)] \
106+
for i in xrange(int(math.ceil(1.*len(text)/width))) ])
107+
108+
def formatTime(time):
109+
return locale.format("%.*f", (0, time), True)
110+
111+
# Group the stats by keys. We should really just stick these in a SQL database
112+
# so we aren't reiventing the wheel.
113+
def group(stats, groupBy):
114+
vals = {}
115+
for stat in stats:
116+
key = []
117+
for group in groupBy:
118+
key.append(stat[group])
119+
key = tuple(key)
120+
if key not in vals:
121+
vals[key] = []
122+
vals[key].append(stat['time'])
123+
return vals;
124+
125+
126+
def mean(l):
127+
return array(l).mean()
128+
129+
130+
# Take the somewhat normalized stats file and flatten it so there is a row for
131+
# every recorded stat.
132+
def flatten(stats):
133+
rows = []
134+
for stat in stats:
135+
for s in stat['stats']:
136+
rows.append({
137+
'browser': stat['browser'],
138+
'page': stat['page'],
139+
'pdf': stat['pdf'],
140+
'round': stat['round'],
141+
'stat': s['name'],
142+
'time': int(s['end']) - int(s['start'])
143+
})
144+
return rows
145+
146+
# Dump various stats in a table to compare the baseline and current results.
147+
# T-test Refresher:
148+
# If I understand t-test correctly, p is the probability that we'll observe
149+
# another test that is as extreme as the current result assuming the null
150+
# hypothesis is true. P is NOT the probability of the null hypothesis.
151+
# The null hypothesis in this case is that the baseline and current results will
152+
# be the same. It is generally accepted that you can reject the null hypothesis
153+
# if the p-value is less than 0.05. So if p < 0.05 we can reject the results
154+
# are the same which doesn't necessarily mean the results are faster/slower but
155+
# it can be implied.
156+
def stat(baseline, current, groupBy):
157+
labels = groupBy + ['Baseline(ms)', 'Current(ms)', '+/-', '%', 'Result(P<.05)']
158+
baselineGroup = group(baseline, groupBy)
159+
currentGroup = group(current, groupBy)
160+
rows = []
161+
for key in baselineGroup:
162+
t, p = stats.ttest_ind(baselineGroup[key], currentGroup[key], equal_var = False)
163+
baseline = mean(baselineGroup[key])
164+
current = mean(currentGroup[key])
165+
speed = ''
166+
if p < 0.05:
167+
speed = 'faster' if current < baseline else 'slower'
168+
row = list(key)
169+
row += [
170+
formatTime(baseline),
171+
formatTime(current),
172+
formatTime(baseline - current),
173+
round(100 * (1.0 * baseline - current) / baseline, 2),
174+
speed
175+
]
176+
rows.append(row)
177+
rows.sort(key=lambda row: tuple(row[0:len(groupBy)]))
178+
print indent([labels] + rows, hasHeader=True)
179+
180+
def main():
181+
optionParser = TestOptions()
182+
options, args = optionParser.parse_args()
183+
options = optionParser.verifyOptions(options, args)
184+
if options == None:
185+
sys.exit(1)
186+
187+
with open(args[0]) as baselineFile:
188+
baseline = flatten(json.load(baselineFile))
189+
with open(args[1]) as currentFile:
190+
current = flatten(json.load(currentFile))
191+
192+
for groupBy in options.groupBy:
193+
print "-- Grouped By " + ', '.join(groupBy) + ' -- '
194+
stat(baseline, current, groupBy)
195+
196+
197+
if __name__ == '__main__':
198+
main()

test/test.py

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,10 @@ def __init__(self, **kwargs):
6262
help="Skips test PDFs downloading.", default=False)
6363
self.add_option("--ignoreDownloadErrors", action="store_true", dest="ignoreDownloadErrors",
6464
help="Ignores errors during test PDFs downloading.", default=False)
65+
self.add_option("--statsFile", action="store", dest="statsFile", type="string",
66+
help="The file where to store stats.", default=None)
67+
self.add_option("--statsDelay", action="store", dest="statsDelay", type="int",
68+
help="The amount of time in milliseconds the browser should wait before starting stats.", default=10000)
6569
self.set_usage(USAGE_EXAMPLE)
6670

6771
def verifyOptions(self, options):
@@ -75,6 +79,8 @@ def verifyOptions(self, options):
7579
print "Warning: ignoring browser argument since manifest file was also supplied"
7680
if not options.browser and not options.browserManifestFile:
7781
print "Starting server on port %s." % options.port
82+
if not options.statsFile:
83+
options.statsDelay = 0
7884

7985
return options
8086

@@ -111,6 +117,8 @@ class State:
111117
numFBFFailures = 0
112118
numLoadFailures = 0
113119
eqLog = None
120+
saveStats = False
121+
stats = [ ]
114122
lastPost = { }
115123

116124
class UnitTestState:
@@ -323,6 +331,15 @@ def do_POST(self):
323331
id, failure, round, page, snapshot = result['id'], result['failure'], result['round'], result['page'], result['snapshot']
324332
taskResults = State.taskResults[browser][id]
325333
taskResults[round].append(Result(snapshot, failure, page))
334+
if State.saveStats:
335+
stat = {
336+
'browser': browser,
337+
'pdf': id,
338+
'page': page,
339+
'round': round,
340+
'stats': result['stats']
341+
}
342+
State.stats.append(stat)
326343

327344
def isTaskDone():
328345
numPages = result["numPages"]
@@ -552,6 +569,8 @@ def setUp(options):
552569
taskResults.append([ ])
553570
State.taskResults[b.name][id] = taskResults
554571

572+
if options.statsFile != None:
573+
State.saveStats = True
555574
return testBrowsers
556575

557576
def setUpUnitTests(options):
@@ -572,6 +591,7 @@ def startBrowsers(browsers, options, path):
572591
host = 'http://%s:%s' % (SERVER_HOST, options.port)
573592
qs = '?browser='+ urllib.quote(b.name) +'&manifestFile='+ urllib.quote(options.manifestFile)
574593
qs += '&path=' + b.path
594+
qs += '&delay=' + str(options.statsDelay)
575595
b.start(host + path + qs)
576596

577597
def teardownBrowsers(browsers):
@@ -689,7 +709,7 @@ def checkLoad(task, results, browser):
689709
print 'TEST-PASS | load test', task['id'], '| in', browser
690710

691711

692-
def processResults():
712+
def processResults(options):
693713
print ''
694714
numFatalFailures = (State.numErrors + State.numFBFFailures)
695715
if 0 == State.numEqFailures and 0 == numFatalFailures:
@@ -702,6 +722,10 @@ def processResults():
702722
print ' different ref/snapshot:', State.numEqFailures
703723
if 0 < State.numFBFFailures:
704724
print ' different first/second rendering:', State.numFBFFailures
725+
if options.statsFile != None:
726+
with open(options.statsFile, 'w') as sf:
727+
sf.write(json.dumps(State.stats, sort_keys=True, indent=4))
728+
print 'Wrote stats file: ' + options.statsFile
705729

706730

707731
def maybeUpdateRefImages(options, browser):
@@ -752,7 +776,7 @@ def runTests(options, browsers):
752776
State.remaining[b] = 0
753777
checkIfDone()
754778
time.sleep(1)
755-
processResults()
779+
processResults(options)
756780
finally:
757781
teardownBrowsers(browsers)
758782
t2 = time.time()

0 commit comments

Comments
 (0)