add documentation for discrete time optimal control problems

murrayrm · murrayrm · commit 522a8d742587 · 2023-09-15T21:24:54.000-07:00
diff --git a/control/optimal.py b/control/optimal.py
@@ -66,7 +66,7 @@ class OptimalControlProblem():
        `(fun, lb, ub)`.  The constraints will be applied at each time point
        along the trajectory.
     terminal_cost : callable, optional
-        Function that returns the terminal cost given the current state
+        Function that returns the terminal cost given the final state
         and input.  Called as terminal_cost(x, u).
     trajectory_method : string, optional
         Method to use for carrying out the optimization. Currently supported
@@ -287,12 +287,16 @@ def __init__(
     # time point and we use a trapezoidal approximation to compute the
     # integral cost, then add on the terminal cost.
     #
-    # For shooting methods, given the input U = [u[0], ... u[N]] we need to
+    # For shooting methods, given the input U = [u[t_0], ... u[t_N]] we need to
     # compute the cost of the trajectory generated by that input.  This
     # means we have to simulate the system to get the state trajectory X =
-    # [x[0], ..., x[N]] and then compute the cost at each point:
+    # [x[t_0], ..., x[t_N]] and then compute the cost at each point:
     #
-    #   cost = sum_k integral_cost(x[k], u[k]) + terminal_cost(x[N], u[N])
+    #   cost = sum_k integral_cost(x[t_k], u[t_k])
+    #          + terminal_cost(x[t_N], u[t_N])
+    #
+    # The actual calculation is a bit more complex: for continuous time
+    # systems, we use a trapezoidal approximation for the integral cost.
     #
     # The initial state used for generating the simulation is stored in the
     # class parameter `x` prior to calling the optimization algorithm.
@@ -325,8 +329,8 @@ def _cost_function(self, coeffs):
             # Sum the integral cost over the time (second) indices
             # cost += self.integral_cost(states[:,i], inputs[:,i])
             cost = sum(map(
-                self.integral_cost, np.transpose(states[:, :-1]),
-                np.transpose(inputs[:, :-1])))
+                self.integral_cost, states[:, :-1].transpose(),
+                inputs[:, :-1].transpose()))
 
         # Terminal cost
         if self.terminal_cost is not None:
@@ -954,7 +958,22 @@ def solve_ocp(
         transpose=None, return_states=True, print_summary=True, log=False,
         **kwargs):
 
-    """Compute the solution to an optimal control problem
+    """Compute the solution to an optimal control problem.
+
+    The optimal trajectory (states and inputs) is computed so as to
+    approximately mimimize a cost function of the following form (for
+    continuous time systems):
+
+      J(x(.), u(.)) = \int_0^T L(x(t), u(t)) dt + V(x(T)),
+
+    where T is the time horizon.
+
+    Discrete time systems use a similar formulation, with the integral
+    replaced by a sum:
+
+      J(x[.], u[.]) = \sum_0^{N-1} L(x_k, u_k) + V(x_N),
+
+    where N is the time horizon (corresponding to timepts[-1]).
 
     Parameters
     ----------
@@ -968,7 +987,7 @@ def solve_ocp(
         Initial condition (default = 0).
 
     cost : callable
-        Function that returns the integral cost given the current state
+        Function that returns the integral cost (L) given the current state
         and input.  Called as `cost(x, u)`.
 
     trajectory_constraints : list of tuples, optional
@@ -990,8 +1009,10 @@ def solve_ocp(
         The constraints are applied at each time point along the trajectory.
 
     terminal_cost : callable, optional
-        Function that returns the terminal cost given the current state
-        and input.  Called as terminal_cost(x, u).
+        Function that returns the terminal cost (V) given the final state
+        and input.  Called as terminal_cost(x, u).  (For compatibility with
+        the form of the cost function, u is passed even though it is often
+        not part of the terminal cost.)
 
     terminal_constraints : list of tuples, optional
         List of constraints that should hold at the end of the trajectory.
@@ -1044,9 +1065,19 @@ def solve_ocp(
 
     Notes
     -----
-    Additional keyword parameters can be used to fine-tune the behavior of
-    the underlying optimization and integration functions.  See
-    :func:`OptimalControlProblem` for more information.
+    1. For discrete time systems, the final value of the timepts vector
+       specifies the final time t_N, and the trajectory cost is computed
+       from time t_0 to t_{N-1}.  Note that the input u_N does not affect
+       the state x_N and so it should always be returned as 0.  Further, if
+       neither a terminal cost nor a terminal constraint is given, then the
+       input at time point t_{N-1} does not affect the cost function and
+       hence u_{N-1} will also be returned as zero.  If you want the
+       trajectory cost to include state costs at time t_{N}, then you can
+       set `terminal_cost` to be the same function as `cost`.
+
+    2. Additional keyword parameters can be used to fine-tune the behavior
+       of the underlying optimization and integration functions.  See
+       :func:`OptimalControlProblem` for more information.
 
     """
     # Process keyword arguments
@@ -1116,7 +1147,7 @@ def create_mpc_iosystem(
         See :func:`~control.optimal.solve_ocp` for more details.
 
     terminal_cost : callable, optional
-        Function that returns the terminal cost given the current state
+        Function that returns the terminal cost given the final state
         and input.  Called as terminal_cost(x, u).
 
     terminal_constraints : list of tuples, optional
diff --git a/doc/optimal.rst b/doc/optimal.rst
@@ -65,6 +65,13 @@ can be on the input, the state, or combinations of input and state,
 depending on the form of :math:`g_i`.  Furthermore, these constraints are
 intended to hold at all instants in time along the trajectory.
 
+For a discrete time system, the same basic formulation applies except
+that the cost function is given by
+
+.. math::
+
+  J(x, u) = \sum_{k=0}^{N-1} L(x_k, u_k)\, dt + V \bigl( x_N \bigr).
+
 A common use of optimization-based control techniques is the implementation
 of model predictive control (also called receding horizon control).  In
 model predictive control, a finite horizon optimal control problem is solved,