Merge pull request mozilla#2449 from brendandahl/statsrecord

yurydelendik · yurydelendik · commit 9583cb710808 · 2012-12-10T20:03:14.000-08:00
Start of the stat utilities.
diff --git a/test/driver.js b/test/driver.js
@@ -25,6 +25,7 @@
 //   https://github.com/mozilla/pdf.js/pull/764#issuecomment-2638944
 //   "firefox-bin: Fatal IO error 12 (Cannot allocate memory) on X server :1."
 // PDFJS.disableWorker = true;
+PDFJS.enableStats = true;
 
 var appPath, browser, canvas, dummyCanvas, currentTaskIdx, manifest, stdout;
 var inFlightRequests = 0;
@@ -45,6 +46,7 @@ function load() {
   browser = params.browser;
   var manifestFile = params.manifestFile;
   appPath = params.path;
+  var delay = params.delay || 0;
 
   canvas = document.createElement('canvas');
   canvas.mozOpaque = true;
@@ -67,7 +69,14 @@ function load() {
       nextTask();
     }
   };
-  r.send(null);
+  if (delay) {
+    log('\nDelaying for ' + delay + 'ms...\n');
+  }
+  // When gathering the stats the numbers seem to be more reliable if the
+  // browser is given more time to startup.
+  setTimeout(function() {
+    r.send(null);
+  }, delay);
 }
 
 function cleanup() {
@@ -110,6 +119,7 @@ function nextTask() {
   }
   var task = manifest[currentTaskIdx];
   task.round = 0;
+  task.stats = {times: []};
 
   log('Loading file "' + task.file + '"\n');
 
@@ -265,6 +275,8 @@ function nextPage(task, loadError) {
         };
         var completeRender = (function(error) {
           page.destroy();
+          task.stats = page.stats;
+          page.stats = new StatTimer();
           snapshotCurrentPage(task, error);
         });
         page.render(renderContext).then(function() {
@@ -340,7 +352,8 @@ function sendTaskResult(snapshot, task, failure, result) {
       file: task.file,
       round: task.round,
       page: task.pageNum,
-      snapshot: snapshot
+      snapshot: snapshot,
+      stats: task.stats.times
     });
   }
 
diff --git a/test/stats/results/.gitignore b/test/stats/results/.gitignore
@@ -0,0 +1,5 @@
+# Ignore everything in this directory
+*
+# Except this file
+!.gitignore
+
diff --git a/test/stats/statcmp.py b/test/stats/statcmp.py
@@ -0,0 +1,198 @@
+from numpy import *
+from scipy import stats
+import json, locale
+from optparse import OptionParser
+
+VALID_GROUP_BYS = ['browser', 'pdf', 'page', 'round', 'stat']
+USAGE_EXAMPLE = "%prog BASELINE CURRENT"
+class TestOptions(OptionParser):
+    def __init__(self, **kwargs):
+        OptionParser.__init__(self, **kwargs)
+        self.add_option("--groupBy", action="append", dest="groupBy", type="string",
+                        help="How the statistics should grouped. Valid options: " + ', '.join(VALID_GROUP_BYS) + '.', default=[])
+
+        self.set_usage(USAGE_EXAMPLE)
+
+    def verifyOptions(self, options, args):
+        if len(args) < 2:
+            self.error('There must be two comparison files arguments.')
+        # Veryify the group by options.
+        groupBy = []
+        if not options.groupBy:
+          options.groupBy = ['browser,stat', 'browser,pdf,stat']
+        for group in options.groupBy:
+            group = group.split(',')
+            for column in group:
+              if column not in VALID_GROUP_BYS:
+                self.error('Invalid group by option of "' + column + '"')
+            groupBy.append(group)
+        options.groupBy = groupBy
+
+        return options
+
+## {{{ http://code.activestate.com/recipes/267662/ (r7)
+import cStringIO,operator
+
+def indent(rows, hasHeader=False, headerChar='-', delim=' | ', justify='left',
+           separateRows=False, prefix='', postfix='', wrapfunc=lambda x:x):
+    """Indents a table by column.
+       - rows: A sequence of sequences of items, one sequence per row.
+       - hasHeader: True if the first row consists of the columns' names.
+       - headerChar: Character to be used for the row separator line
+         (if hasHeader==True or separateRows==True).
+       - delim: The column delimiter.
+       - justify: Determines how are data justified in their column. 
+         Valid values are 'left','right' and 'center'.
+       - separateRows: True if rows are to be separated by a line
+         of 'headerChar's.
+       - prefix: A string prepended to each printed row.
+       - postfix: A string appended to each printed row.
+       - wrapfunc: A function f(text) for wrapping text; each element in
+         the table is first wrapped by this function."""
+    # closure for breaking logical rows to physical, using wrapfunc
+    def rowWrapper(row):
+        newRows = [wrapfunc(str(item)).split('\n') for item in row]
+        return [[substr or '' for substr in item] for item in map(None,*newRows)]
+    # break each logical row into one or more physical ones
+    logicalRows = [rowWrapper(row) for row in rows]
+    # columns of physical rows
+    columns = map(None,*reduce(operator.add,logicalRows))
+    # get the maximum of each column by the string length of its items
+    maxWidths = [max([len(str(item)) for item in column]) for column in columns]
+    rowSeparator = headerChar * (len(prefix) + len(postfix) + sum(maxWidths) + \
+                                 len(delim)*(len(maxWidths)-1))
+    # select the appropriate justify method
+    justify = {'center':str.center, 'right':str.rjust, 'left':str.ljust}[justify.lower()]
+    output=cStringIO.StringIO()
+    if separateRows: print >> output, rowSeparator
+    for physicalRows in logicalRows:
+        for row in physicalRows:
+            print >> output, \
+                prefix \
+                + delim.join([justify(str(item),width) for (item,width) in zip(row,maxWidths)]) \
+                + postfix
+        if separateRows or hasHeader: print >> output, rowSeparator; hasHeader=False
+    return output.getvalue()
+
+# written by Mike Brown
+# http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/148061
+def wrap_onspace(text, width):
+    """
+    A word-wrap function that preserves existing line breaks
+    and most spaces in the text. Expects that existing line
+    breaks are posix newlines (\n).
+    """
+    return reduce(lambda line, word, width=width: '%s%s%s' %
+                  (line,
+                   ' \n'[(len(line[line.rfind('\n')+1:])
+                         + len(word.split('\n',1)[0]
+                              ) >= width)],
+                   word),
+                  text.split(' ')
+                 )
+
+import re
+def wrap_onspace_strict(text, width):
+    """Similar to wrap_onspace, but enforces the width constraint:
+       words longer than width are split."""
+    wordRegex = re.compile(r'\S{'+str(width)+r',}')
+    return wrap_onspace(wordRegex.sub(lambda m: wrap_always(m.group(),width),text),width)
+
+import math
+def wrap_always(text, width):
+    """A simple word-wrap function that wraps text on exactly width characters.
+       It doesn't split the text in words."""
+    return '\n'.join([ text[width*i:width*(i+1)] \
+                       for i in xrange(int(math.ceil(1.*len(text)/width))) ])
+
+def formatTime(time):
+    return locale.format("%.*f", (0, time), True)
+
+# Group the stats by keys. We should really just stick these in a SQL database
+# so we aren't reiventing the wheel.
+def group(stats, groupBy):
+    vals = {}
+    for stat in stats:
+        key = []
+        for group in groupBy:
+            key.append(stat[group])
+        key = tuple(key)
+        if key not in vals:
+            vals[key] = []
+        vals[key].append(stat['time'])
+    return vals;
+
+
+def mean(l):
+    return array(l).mean()
+
+
+# Take the somewhat normalized stats file and flatten it so there is a row for
+# every recorded stat.
+def flatten(stats):
+    rows = []
+    for stat in stats:
+        for s in stat['stats']:
+            rows.append({
+                'browser': stat['browser'],
+                'page': stat['page'],
+                'pdf': stat['pdf'],
+                'round': stat['round'],
+                'stat': s['name'],
+                'time': int(s['end']) - int(s['start'])
+            })
+    return rows
+
+# Dump various stats in a table to compare the baseline and current results.
+# T-test Refresher:
+# If I understand t-test correctly, p is the probability that we'll observe
+# another test that is as extreme as the current result assuming the null
+# hypothesis is true. P is NOT the probability of the null hypothesis.
+# The null hypothesis in this case is that the baseline and current results will
+# be the same. It is generally accepted that you can reject the null hypothesis
+# if the p-value is less than 0.05.  So if p < 0.05 we can reject the results
+# are the same which doesn't necessarily mean the results are faster/slower but
+# it can be implied.
+def stat(baseline, current, groupBy):
+    labels = groupBy + ['Baseline(ms)', 'Current(ms)', '+/-', '%', 'Result(P<.05)']
+    baselineGroup = group(baseline, groupBy)
+    currentGroup = group(current, groupBy)
+    rows = []
+    for key in baselineGroup:
+        t, p = stats.ttest_ind(baselineGroup[key], currentGroup[key], equal_var = False)
+        baseline = mean(baselineGroup[key])
+        current = mean(currentGroup[key])
+        speed = ''
+        if p < 0.05:
+          speed = 'faster' if current < baseline else 'slower'
+        row = list(key)
+        row += [
+            formatTime(baseline),
+            formatTime(current),
+            formatTime(baseline - current),
+            round(100 * (1.0 * baseline - current) / baseline, 2),
+            speed
+        ]
+        rows.append(row)
+    rows.sort(key=lambda row: tuple(row[0:len(groupBy)]))
+    print indent([labels] + rows, hasHeader=True)
+
+def main():
+    optionParser = TestOptions()
+    options, args = optionParser.parse_args()
+    options = optionParser.verifyOptions(options, args)
+    if options == None:
+        sys.exit(1)
+
+    with open(args[0]) as baselineFile:
+        baseline = flatten(json.load(baselineFile))
+    with open(args[1]) as currentFile:
+        current = flatten(json.load(currentFile))
+
+    for groupBy in options.groupBy:
+      print "-- Grouped By " + ', '.join(groupBy) + ' -- '
+      stat(baseline, current, groupBy)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/test/test.py b/test/test.py
@@ -62,6 +62,10 @@ def __init__(self, **kwargs):
                         help="Skips test PDFs downloading.", default=False)
         self.add_option("--ignoreDownloadErrors", action="store_true", dest="ignoreDownloadErrors",
                         help="Ignores errors during test PDFs downloading.", default=False)
+        self.add_option("--statsFile", action="store", dest="statsFile", type="string",
+                        help="The file where to store stats.", default=None)
+        self.add_option("--statsDelay", action="store", dest="statsDelay", type="int",
+                        help="The amount of time in milliseconds the browser should wait before starting stats.", default=10000)
         self.set_usage(USAGE_EXAMPLE)
 
     def verifyOptions(self, options):
@@ -75,6 +79,8 @@ def verifyOptions(self, options):
             print "Warning: ignoring browser argument since manifest file was also supplied"
         if not options.browser and not options.browserManifestFile:
             print "Starting server on port %s." % options.port
+        if not options.statsFile:
+            options.statsDelay = 0
 
         return options
         
@@ -111,6 +117,8 @@ class State:
     numFBFFailures = 0
     numLoadFailures = 0
     eqLog = None
+    saveStats = False
+    stats = [ ]
     lastPost = { }
 
 class UnitTestState:
@@ -323,6 +331,15 @@ def do_POST(self):
         id, failure, round, page, snapshot = result['id'], result['failure'], result['round'], result['page'], result['snapshot']
         taskResults = State.taskResults[browser][id]
         taskResults[round].append(Result(snapshot, failure, page))
+        if State.saveStats:
+            stat = {
+                'browser': browser,
+                'pdf': id,
+                'page': page,
+                'round': round,
+                'stats': result['stats']
+            }
+            State.stats.append(stat)
 
         def isTaskDone():
             numPages = result["numPages"]
@@ -552,6 +569,8 @@ def setUp(options):
                 taskResults.append([ ])
             State.taskResults[b.name][id] = taskResults
 
+    if options.statsFile != None:
+        State.saveStats = True
     return testBrowsers
 
 def setUpUnitTests(options):
@@ -572,6 +591,7 @@ def startBrowsers(browsers, options, path):
         host = 'http://%s:%s' % (SERVER_HOST, options.port) 
         qs = '?browser='+ urllib.quote(b.name) +'&manifestFile='+ urllib.quote(options.manifestFile)
         qs += '&path=' + b.path
+        qs += '&delay=' + str(options.statsDelay)
         b.start(host + path + qs)
 
 def teardownBrowsers(browsers):
@@ -689,7 +709,7 @@ def checkLoad(task, results, browser):
     print 'TEST-PASS | load test', task['id'], '| in', browser
 
 
-def processResults():
+def processResults(options):
     print ''
     numFatalFailures = (State.numErrors + State.numFBFFailures)
     if 0 == State.numEqFailures and 0 == numFatalFailures:
@@ -702,6 +722,10 @@ def processResults():
             print '  different ref/snapshot:', State.numEqFailures
         if 0 < State.numFBFFailures:
             print '  different first/second rendering:', State.numFBFFailures
+    if options.statsFile != None:
+        with open(options.statsFile, 'w') as sf:
+            sf.write(json.dumps(State.stats, sort_keys=True, indent=4))
+        print 'Wrote stats file: ' + options.statsFile
 
 
 def maybeUpdateRefImages(options, browser):
@@ -752,7 +776,7 @@ def runTests(options, browsers):
                     State.remaining[b] = 0
                     checkIfDone()
             time.sleep(1)
-        processResults()
+        processResults(options)
     finally:
         teardownBrowsers(browsers)
     t2 = time.time()