SI-8999 Reduce memory usage in exhaustivity check

gbasler · adriaanm · commit 578c3b188a4a · 2014-12-12T13:56:11.000-08:00
The OOM could occur when all models are forcibly expanded in the DPLL solver.

The simplest solution would be to limit the number of returned models but then
we are back in non-determinism land (since the subset we get back depends on
which models were found first).

A better alternative is to create only the models that have corresponding
counter examples.

If this does not ring a bell, here's a longer explanation:

TThe models we get from the DPLL solver need to be mapped back to counter
examples. However there's no precalculated mapping model -&gt; counter example.
Even worse, not every valid model corresponds to a valid counter example.

The reason is that restricting the valid models further would for example
require a quadratic number of additional clauses. So to keep the optimistic case
fast (i.e., all cases are covered in a pattern match), the infeasible counter
examples are filtered later.

The DPLL procedure keeps the literals that do not contribute to the solution
unassigned, e.g., for  `(a \/ b)` only {a = true} or {b = true} is required and
the other variable can have any value. This function does a smart expansion of
the model and avoids models that have conflicting mappings.

For example for in case of the given set of symbols (taken from `t7020.scala`):
 "V2=2#16"
 "V2=6#19"
 "V2=5#18"
 "V2=4#17"
 "V2=7#20"

One possibility would be to group the symbols by domain but
this would only work for equality tests and would not be compatible
with type tests.

Another observation leads to a much simpler algorithm:
Only one of these symbols can be set to true,
since `V2` can at most be equal to one of {2,6,5,4,7}.

TODO: How does this fare with mixed TypeConst/ValueConst assignments?
When the assignments are e.g., V2=Int | V2=2 | V2=4, only the assignments
to value constants are mutually exclusive.
diff --git a/src/compiler/scala/tools/nsc/transform/patmat/Logic.scala b/src/compiler/scala/tools/nsc/transform/patmat/Logic.scala
@@ -370,9 +370,11 @@ trait Logic extends Debugging  {
     val EmptyModel: Model
     val NoModel: Model
 
+    final case class Solution(model: Model, unassigned: List[Sym])
+
     def findModelFor(solvable: Solvable): Model
 
-    def findAllModelsFor(solvable: Solvable): List[Model]
+    def findAllModelsFor(solvable: Solvable): List[Solution]
   }
 }
 
diff --git a/src/compiler/scala/tools/nsc/transform/patmat/MatchAnalysis.scala b/src/compiler/scala/tools/nsc/transform/patmat/MatchAnalysis.scala
@@ -6,6 +6,8 @@
 
 package scala.tools.nsc.transform.patmat
 
+import scala.annotation.tailrec
+import scala.collection.immutable.{IndexedSeq, Iterable}
 import scala.language.postfixOps
 import scala.collection.mutable
 import scala.reflect.internal.util.Statistics
@@ -514,8 +516,16 @@ trait MatchAnalysis extends MatchApproximation {
 
         // find the models (under which the match fails)
         val matchFailModels = findAllModelsFor(propToSolvable(matchFails))
+
         val scrutVar = Var(prevBinderTree)
-        val counterExamples = matchFailModels.flatMap(modelToCounterExample(scrutVar))
+        val counterExamples = {
+          matchFailModels.flatMap {
+            model =>
+              val varAssignments = expandModel(model)
+              varAssignments.flatMap(modelToCounterExample(scrutVar) _)
+          }
+        }
+
         // sorting before pruning is important here in order to
         // keep neg/t7020.scala stable
         // since e.g. List(_, _) would cover List(1, _)
@@ -587,6 +597,8 @@ trait MatchAnalysis extends MatchApproximation {
     case object WildcardExample extends CounterExample { override def toString = "_" }
     case object NoExample extends CounterExample { override def toString = "??" }
 
+    // returns a mapping from variable to
+    // equal and notEqual symbols
     def modelToVarAssignment(model: Model): Map[Var, (Seq[Const], Seq[Const])] =
       model.toSeq.groupBy{f => f match {case (sym, value) => sym.variable} }.mapValues{ xs =>
         val (trues, falses) = xs.partition(_._2)
@@ -600,20 +612,110 @@ trait MatchAnalysis extends MatchApproximation {
          v +"(="+ v.path +": "+ v.staticTpCheckable +") "+ assignment
        }.mkString("\n")
 
-    // return constructor call when the model is a true counter example
-    // (the variables don't take into account type information derived from other variables,
-    //  so, naively, you might try to construct a counter example like _ :: Nil(_ :: _, _ :: _),
-    //  since we didn't realize the tail of the outer cons was a Nil)
-    def modelToCounterExample(scrutVar: Var)(model: Model): Option[CounterExample] = {
+    /**
+     * The models we get from the DPLL solver need to be mapped back to counter examples.
+     * However there's no precalculated mapping model -> counter example. Even worse,
+     * not every valid model corresponds to a valid counter example.
+     * The reason is that restricting the valid models further would for example require
+     * a quadratic number of additional clauses. So to keep the optimistic case fast
+     * (i.e., all cases are covered in a pattern match), the infeasible counter examples
+     * are filtered later.
+     *
+     * The DPLL procedure keeps the literals that do not contribute to the solution
+     * unassigned, e.g., for  `(a \/ b)`
+     * only {a = true} or {b = true} is required and the other variable can have any value.
+     *
+     * This function does a smart expansion of the model and avoids models that
+     * have conflicting mappings.
+     *
+     * For example for in case of the given set of symbols (taken from `t7020.scala`):
+     *  "V2=2#16"
+     *  "V2=6#19"
+     *  "V2=5#18"
+     *  "V2=4#17"
+     *  "V2=7#20"
+     *
+     * One possibility would be to group the symbols by domain but
+     * this would only work for equality tests and would not be compatible
+     * with type tests.
+     * Another observation leads to a much simpler algorithm:
+     * Only one of these symbols can be set to true,
+     * since `V2` can at most be equal to one of {2,6,5,4,7}.
+     */
+    def expandModel(solution: Solution): List[Map[Var, (Seq[Const], Seq[Const])]] = {
+
+      val model = solution.model
+
       // x1 = ...
       // x1.hd = ...
       // x1.tl = ...
       // x1.hd.hd = ...
       // ...
       val varAssignment = modelToVarAssignment(model)
+      debug.patmat("var assignment for model " + model + ":\n" + varAssignmentString(varAssignment))
+
+      // group symbols that assign values to the same variables (i.e., symbols are mutually exclusive)
+      // (thus the groups are sets of disjoint assignments to variables)
+      val groupedByVar: Map[Var, List[Sym]] = solution.unassigned.groupBy(_.variable)
+
+      val expanded = for {
+        (variable, syms) <- groupedByVar.toList
+      } yield {
+
+        val (equal, notEqual) = varAssignment.getOrElse(variable, Nil -> Nil)
+
+        def addVarAssignment(equalTo: List[Const], notEqualTo: List[Const]) = {
+          Map(variable ->(equal ++ equalTo, notEqual ++ notEqualTo))
+        }
+
+        // this assignment is needed in case that
+        // there exists already an assign
+        val allNotEqual = addVarAssignment(Nil, syms.map(_.const))
 
-      debug.patmat("var assignment for model "+ model +":\n"+ varAssignmentString(varAssignment))
+        // this assignment is conflicting on purpose:
+        // a list counter example could contain wildcards: e.g. `List(_,_)`
+        val allEqual = addVarAssignment(syms.map(_.const), Nil)
 
+        if(equal.isEmpty) {
+          val oneHot = for {
+            s <- syms
+          } yield {
+            addVarAssignment(List(s.const), syms.filterNot(_ == s).map(_.const))
+          }
+          allEqual :: allNotEqual :: oneHot
+        } else {
+          allEqual :: allNotEqual :: Nil
+        }
+      }
+
+      if (expanded.isEmpty) {
+        List(varAssignment)
+      } else {
+        // we need the cartesian product here,
+        // since we want to report all missing cases
+        // (i.e., combinations)
+        val cartesianProd = expanded.reduceLeft((xs, ys) =>
+          for {map1 <- xs
+               map2 <- ys} yield {
+            map1 ++ map2
+          })
+
+        // add expanded variables
+        // note that we can just use `++`
+        // since the Maps have disjoint keySets
+        for {
+          m <- cartesianProd
+        } yield {
+          varAssignment ++ m
+        }
+      }
+    }
+
+    // return constructor call when the model is a true counter example
+    // (the variables don't take into account type information derived from other variables,
+    //  so, naively, you might try to construct a counter example like _ :: Nil(_ :: _, _ :: _),
+    //  since we didn't realize the tail of the outer cons was a Nil)
+    def modelToCounterExample(scrutVar: Var)(varAssignment: Map[Var, (Seq[Const], Seq[Const])]): Option[CounterExample] = {
       // chop a path into a list of symbols
       def chop(path: Tree): List[Symbol] = path match {
         case Ident(_) => List(path.symbol)
@@ -742,7 +844,7 @@ trait MatchAnalysis extends MatchApproximation {
               // then we can safely ignore these counter examples since we will eventually encounter
               // both counter examples separately
               case _ if inSameDomain => None
-                
+
               // not a valid counter-example, possibly since we have a definite type but there was a field mismatch
               // TODO: improve reasoning -- in the mean time, a false negative is better than an annoying false positive
               case _ => Some(NoExample)
diff --git a/src/compiler/scala/tools/nsc/transform/patmat/Solving.scala b/src/compiler/scala/tools/nsc/transform/patmat/Solving.scala
@@ -288,7 +288,7 @@ trait Solving extends Logic {
     val NoTseitinModel: TseitinModel = null
 
     // returns all solutions, if any (TODO: better infinite recursion backstop -- detect fixpoint??)
-    def findAllModelsFor(solvable: Solvable): List[Model] = {
+    def findAllModelsFor(solvable: Solvable): List[Solution] = {
       debug.patmat("find all models for\n"+ cnfString(solvable.cnf))
 
       // we must take all vars from non simplified formula
@@ -305,54 +305,12 @@ trait Solving extends Logic {
         relevantLits.map(lit => -lit)
       }
 
-      /**
-       * The DPLL procedure only returns a minimal mapping from literal to value
-       * such that the CNF formula is satisfied.
-       * E.g. for:
-       * `(a \/ b)`
-       * The DPLL procedure will find either {a = true} or {b = true}
-       * as solution.
-       *
-       * The expansion step will amend both solutions with the unassigned variable
-       * i.e., {a = true} will be expanded to {a = true, b = true} and {a = true, b = false}.
-       */
-      def expandUnassigned(unassigned: List[Int], model: TseitinModel): List[TseitinModel] = {
-        // the number of solutions is doubled for every unassigned variable
-        val expandedModels = 1 << unassigned.size
-        var current = mutable.ArrayBuffer[TseitinModel]()
-        var next = mutable.ArrayBuffer[TseitinModel]()
-        current.sizeHint(expandedModels)
-        next.sizeHint(expandedModels)
-
-        current += model
-
-        // we use double buffering:
-        // read from `current` and create a two models for each model in `next`
-        for {
-          s <- unassigned
-        } {
-          for {
-            model <- current
-          } {
-            def force(l: Lit) = model + l
-
-            next += force(Lit(s))
-            next += force(Lit(-s))
-          }
-
-          val tmp = current
-          current = next
-          next = tmp
-
-          next.clear()
-        }
-
-        current.toList
+      final case class TseitinSolution(model: TseitinModel, unassigned: List[Int]) {
+        def projectToSolution(symForVar: Map[Int, Sym]) = Solution(projectToModel(model, symForVar), unassigned map symForVar)
       }
-
       def findAllModels(clauses: Array[Clause],
-                        models: List[TseitinModel],
-                        recursionDepthAllowed: Int = global.settings.YpatmatExhaustdepth.value): List[TseitinModel]=
+                        models: List[TseitinSolution],
+                        recursionDepthAllowed: Int = global.settings.YpatmatExhaustdepth.value): List[TseitinSolution]=
         if (recursionDepthAllowed == 0) {
           val maxDPLLdepth = global.settings.YpatmatExhaustdepth.value
           reportWarning("(Exhaustivity analysis reached max recursion depth, not all missing cases are reported. " +
@@ -368,17 +326,15 @@ trait Solving extends Logic {
             val unassigned: List[Int] = (relevantVars -- model.map(lit => lit.variable)).toList
             debug.patmat("unassigned "+ unassigned +" in "+ model)
 
-            val forced = expandUnassigned(unassigned, model)
-            debug.patmat("forced "+ forced)
+            val solution = TseitinSolution(model, unassigned)
             val negated = negateModel(model)
-            findAllModels(clauses :+ negated, forced ++ models, recursionDepthAllowed - 1)
+            findAllModels(clauses :+ negated, solution :: models, recursionDepthAllowed - 1)
           }
           else models
         }
 
-      val tseitinModels: List[TseitinModel] = findAllModels(solvable.cnf, Nil)
-      val models: List[Model] = tseitinModels.map(projectToModel(_, solvable.symbolMapping.symForVar))
-      models
+      val tseitinSolutions = findAllModels(solvable.cnf, Nil)
+      tseitinSolutions.map(_.projectToSolution(solvable.symbolMapping.symForVar))
     }
 
     private def withLit(res: TseitinModel, l: Lit): TseitinModel = {
diff --git a/test/files/pos/t8999.flags b/test/files/pos/t8999.flags
@@ -0,0 +1 @@
+-nowarn
diff --git a/test/files/pos/t8999.scala b/test/files/pos/t8999.scala

Original file line number	Diff line number	Diff line change
`@@ -370,9 +370,11 @@ trait Logic extends Debugging {`
`370`	`370`	`val EmptyModel: Model`
`371`	`371`	`val NoModel: Model`
`372`	`372`
	`373`	`+ final case class Solution(model: Model, unassigned: List[Sym])`
	`374`	`+`
`373`	`375`	`def findModelFor(solvable: Solvable): Model`
`374`	`376`
`375`		`- def findAllModelsFor(solvable: Solvable): List[Model]`
	`377`	`+ def findAllModelsFor(solvable: Solvable): List[Solution]`
`376`	`378`	`}`
`377`	`379`	`}`
`378`	`380`