From 05bcd3260a2eefdc8a030adcdd850e75c23363b1 Mon Sep 17 00:00:00 2001
From: Matthew Brett <matthew.brett@gmail.com>
Date: Mon, 16 Jul 2012 20:32:54 -0700
Subject: [PATCH 1/2] RF: comments, renames for multi-run contrast example

JB and I working through explaining the example to ourselves.
---
 examples/formula/multi_session_contrast.py | 131 +++++++++++++--------
 1 file changed, 83 insertions(+), 48 deletions(-)

diff --git a/examples/formula/multi_session_contrast.py b/examples/formula/multi_session_contrast.py
index 74cb7fc513..8df1ed226e 100644
--- a/examples/formula/multi_session_contrast.py
+++ b/examples/formula/multi_session_contrast.py
@@ -1,6 +1,6 @@
 # emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*-
 # vi: set ft=python sts=4 ts=4 sw=4 et:
-""" This example is now out of date - please remind us to fix it
+""" Example of more than one run in the same model
 """
 import numpy as np
 import sympy
@@ -8,78 +8,113 @@
 from nipy.algorithms.statistics.api import Term, Formula, Factor
 from nipy.modalities.fmri import utils, hrf
 
-# hrf
-
+# hrf models.  These are just symbols, placeholders for the hrfs for the two
+# runs
 h1 = sympy.Function('hrf1')
 h2 = sympy.Function('hrf2')
-t = Term('t')
-
-# Session 1
 
-t1 = Term('t1')
-c11 = utils.events([3,7,10], f=h1); c11 = c11.subs(t, t1)
-c21 = utils.events([1,3,9], f=h1); c21 = c21.subs(t, t1)
-c31 = utils.events([2,4,8], f=h1); c31 = c31.subs(t, t1)
-d1 = utils.fourier_basis([0.3,0.5,0.7]); d1 = d1.subs(t, t1)
-tval1 = np.linspace(0,20,101)
+# Symbol for time in general.  The 'events' function below will return models in
+# terms of 't', but we'll want models in terms of 't1' and 't2'.  We need 't'
+# here so we can substitute.
+t = Term('t')
 
+# run 1
+t1 = Term('t1') # Time within run 1
+c11 = utils.events([3, 7, 10], f=h1) # Condition 1, run 1
+# The events utility returns a formula in terms of 't' - general time
+c11 = c11.subs(t, t1) # Now make it in terms of time in run 1
+# Same for conditions 2 and 3
+c21 = utils.events([1, 3, 9], f=h1); c21 = c21.subs(t, t1)
+c31 = utils.events([2, 4, 8], f=h1); c31 = c31.subs(t, t1)
+# Add also a fourier basis set for drift with frequencies 0.3, 0.5, 0.7
+d1 = utils.fourier_basis([0.3, 0.5, 0.7]); d1 = d1.subs(t, t1)
+
+# Here's our formula for run 1 signal terms of time in run 1 (t1)
 f1 = Formula([c11,c21,c31]) + d1
 
-# Session 2
-
-t2 = Term('t2')
-c12 = utils.events([3.3,7,10], f=h2); c12 = c12.subs(t, t2)
-c22 = utils.events([1,3.2,9], f=h2); c22 = c22.subs(t, t2)
-c32 = utils.events([2,4.2,8], f=h2); c32 = c32.subs(t, t2)
-d2 = utils.fourier_basis([0.3,0.5,0.7]); d2 = d2.subs(t, t2)
-tval2 = np.linspace(0,10,51)
-
-f2 = Formula([c12,c22,c32]) + d2
-
-sess_factor = Factor('sess', [1,2])
-
-# The multi session formula
-
-f = Formula([sess_factor.terms[0]]) * f1 + Formula([sess_factor.terms[1]]) * f2
-
-# Now, we evaluate the formula
-# the arrays now have 152=101+51 rows...
-
+# run 2
+t2 = Term('t2') # Time within run 2
+# Conditions 1 through 3 in run 2
+c12 = utils.events([3.3, 7, 10], f=h2); c12 = c12.subs(t, t2)
+c22 = utils.events([1, 3.2, 9], f=h2); c22 = c22.subs(t, t2)
+c32 = utils.events([2, 4.2, 8], f=h2); c32 = c32.subs(t, t2)
+d2 = utils.fourier_basis([0.3, 0.5, 0.7]); d2 = d2.subs(t, t2)
+
+# Formrla for run 2 signal in terms of time in run 2 (t2)
+f2 = Formula([c12, c22, c32]) + d2
+
+# Factor giving constant for run. The [1, 2] means that there are two levels to
+# this factor, and that when we get to pass in values for this factor,
+# instantiating an actual design matrix (see below), a value of 1 means level
+# 1 and a value of 2 means level 2.
+run_factor = Factor('run', [1, 2])
+run_1_coder = run_factor.get_term(1) # Term coding for level 1
+run_2_coder = run_factor.get_term(2) # Term coding for level 2
+
+# The multi run formula will combine the indicator (dummy value) terms from the
+# run factor with the formulae for the runs (which are functions of (run1, run2)
+# time. The run_factor terms are step functions that are zero when not in the
+# run, 1 when in the run.
+f = Formula([run_1_coder]) * f1 + Formula([run_2_coder]) * f2 + run_factor
+
+# Now, we evaluate the formula.  So far we've been entirely symbolic.  Now we
+# start to think about the values at which we want to evaluate our symbolic
+# formula.
+
+# We'll use these values for time within run 1.  The times are in seconds from
+# the beginning of run 1.  In our case run 1 was 20 seconds long. 101 below
+# gives 101 values from 0 to 20 including the endpoints, giving a dt of 0.2.
+tval1 = np.linspace(0, 20, 101)
+# run 2 lasts 10 seconds.  These are the times in terms of the start of run 2.
+tval2 = np.linspace(0, 10, 51)
+
+# We pad out the tval1 / tval2 time vectors with zeros corresponding to the
+# TRs in run 2 / run 1.
 ttval1 = np.hstack([tval1, np.zeros(tval2.shape)])
 ttval2 = np.hstack([np.zeros(tval1.shape), tval2])
-session = np.array([1]*tval1.shape[0] + [2]*tval2.shape[0])
+# The arrays above now have 152=101+51 rows...
 
+# Vector of run numbers for each time point (with values 1 or 2)
+run_no = np.array([1]*tval1.shape[0] + [2]*tval2.shape[0])
+
+# Put actual hrfs into the formulae
 f = f.subs(h1, hrf.glover)
 f = f.subs(h2, hrf.glover)
 
-# Create the recarray that will be used to create the design matrix
-
-rec = np.array([(t1,t2, s) for t1, t2, s in zip(ttval1, ttval2, session)],
+# Create the recarray that will be used to create the design matrix. The
+# recarray gives the actual values for the symbolic terms in the formulae.  In
+# our case the terms are t1, t2, and the (indicator coding) terms from the run
+# factor.
+rec = np.array([(tv1, tv2, s) for tv1, tv2, s in zip(ttval1, ttval2, run_no)],
                np.dtype([('t1', np.float),
                          ('t2', np.float),
-                         ('sess', np.int)]))
+                         ('run', np.int)]))
 
 # The contrast we care about
 
-# It would be a good idea to be able to create
-# the contrast from the Formula, "f" above,
-# applying all of f's aliases to it....
-
-sess_1 = sess_factor.get_term(1)
-sess_2 = sess_factor.get_term(2)
-contrast = Formula([sess_1*c11-sess_2*c12])
+# Note to selves: it would be a good idea to be able to create the contrast from
+# the Formula, "f" above, applying all of f's aliases to it...  But we haven't
+# implemented that yet.
+contrast = Formula([run_1_coder * c11 - run_2_coder * c12])
 contrast = contrast.subs(h1, hrf.glover)
 contrast = contrast.subs(h2, hrf.glover)
 
 # # Create the design matrix
-
 X = f.design(rec, return_float=True)
 
+# Show ourselves the design space covered by the contrast, and the corresponding
+# contrast matrix
 preC = contrast.design(rec, return_float=True)
-
+# C is the matrix such that preC = X.dot(C.T)
 C = np.dot(np.linalg.pinv(X), preC)
 print C
 
-nonzero = np.nonzero(np.fabs(C) >= 0.1)[0]
+# We can also get this by passing the contrast into the design creation.
+X, c = f.design(rec, return_float=True, contrasts=dict(C=contrast))
+assert np.allclose(C, c['C'])
+
+# Show the names of the non-trivial elements of the contrast
+nonzero = np.nonzero(np.fabs(C) >= 1e-5)[0]
 print (f.dtype.names[nonzero[0]], f.dtype.names[nonzero[1]])
-print ((sess_2*c12).subs(h2, hrf.glover), (sess_1*c11).subs(h1, hrf.glover))
+print ((run_2_coder * c12).subs(h2, hrf.glover),
+       (run_1_coder * c11).subs(h1, hrf.glover))

From ab51407cbca18c8c7f5c2e64c14b70ee663b0475 Mon Sep 17 00:00:00 2001
From: Matthew Brett <matthew.brett@gmail.com>
Date: Tue, 17 Jul 2012 12:20:28 -0700
Subject: [PATCH 2/2] RF: removing hrf substitution

It was a little confusing and it wasn't obvious (to me) what it was
achieving.
---
 examples/formula/multi_session_contrast.py | 24 ++++++----------------
 1 file changed, 6 insertions(+), 18 deletions(-)

diff --git a/examples/formula/multi_session_contrast.py b/examples/formula/multi_session_contrast.py
index 8df1ed226e..a85150c4df 100644
--- a/examples/formula/multi_session_contrast.py
+++ b/examples/formula/multi_session_contrast.py
@@ -3,15 +3,14 @@
 """ Example of more than one run in the same model
 """
 import numpy as np
-import sympy
 
 from nipy.algorithms.statistics.api import Term, Formula, Factor
 from nipy.modalities.fmri import utils, hrf
 
-# hrf models.  These are just symbols, placeholders for the hrfs for the two
-# runs
-h1 = sympy.Function('hrf1')
-h2 = sympy.Function('hrf2')
+# hrf models we will use for each run.  Just to show it can be done, use a
+# different hrf model for each run
+h1 = hrf.glover
+h2 = hrf.afni
 
 # Symbol for time in general.  The 'events' function below will return models in
 # terms of 't', but we'll want models in terms of 't1' and 't2'.  We need 't'
@@ -40,7 +39,7 @@
 c32 = utils.events([2, 4.2, 8], f=h2); c32 = c32.subs(t, t2)
 d2 = utils.fourier_basis([0.3, 0.5, 0.7]); d2 = d2.subs(t, t2)
 
-# Formrla for run 2 signal in terms of time in run 2 (t2)
+# Formula for run 2 signal in terms of time in run 2 (t2)
 f2 = Formula([c12, c22, c32]) + d2
 
 # Factor giving constant for run. The [1, 2] means that there are two levels to
@@ -77,10 +76,6 @@
 # Vector of run numbers for each time point (with values 1 or 2)
 run_no = np.array([1]*tval1.shape[0] + [2]*tval2.shape[0])
 
-# Put actual hrfs into the formulae
-f = f.subs(h1, hrf.glover)
-f = f.subs(h2, hrf.glover)
-
 # Create the recarray that will be used to create the design matrix. The
 # recarray gives the actual values for the symbolic terms in the formulae.  In
 # our case the terms are t1, t2, and the (indicator coding) terms from the run
@@ -91,13 +86,7 @@
                          ('run', np.int)]))
 
 # The contrast we care about
-
-# Note to selves: it would be a good idea to be able to create the contrast from
-# the Formula, "f" above, applying all of f's aliases to it...  But we haven't
-# implemented that yet.
 contrast = Formula([run_1_coder * c11 - run_2_coder * c12])
-contrast = contrast.subs(h1, hrf.glover)
-contrast = contrast.subs(h2, hrf.glover)
 
 # # Create the design matrix
 X = f.design(rec, return_float=True)
@@ -116,5 +105,4 @@
 # Show the names of the non-trivial elements of the contrast
 nonzero = np.nonzero(np.fabs(C) >= 1e-5)[0]
 print (f.dtype.names[nonzero[0]], f.dtype.names[nonzero[1]])
-print ((run_2_coder * c12).subs(h2, hrf.glover),
-       (run_1_coder * c11).subs(h1, hrf.glover))
+print ((run_1_coder * c11), (run_2_coder * c12))