From 7a681436a7d64801ae86dc40efa06e265ecc34a7 Mon Sep 17 00:00:00 2001
From: Erik Welch <erik.n.welch@gmail.com>
Date: Sun, 15 Oct 2023 13:12:31 -0500
Subject: [PATCH 1/7] Add docstrings for using SS JIT, and make better

Also:
- allow a SS JIT function to be registered under the same name with different input types
- be extra-strict about input types for SS JIT
- test numba JIT with select with udt
---
 .codecov.yml                          |   2 +-
 graphblas/core/operator/base.py       |  15 ++-
 graphblas/core/operator/binary.py     |   2 +
 graphblas/core/operator/indexunary.py |   7 ++
 graphblas/core/operator/select.py     |  57 ++++++++++-
 graphblas/core/operator/unary.py      |   2 +
 graphblas/core/ss/binary.py           |  63 +++++++++++-
 graphblas/core/ss/indexunary.py       |  89 +++++++++++++++--
 graphblas/core/ss/select.py           |  43 ++++++++
 graphblas/core/ss/unary.py            |  54 +++++++++-
 graphblas/tests/test_op.py            |  15 ++-
 graphblas/tests/test_ssjit.py         | 136 ++++++++++++++++++++++++--
 12 files changed, 452 insertions(+), 33 deletions(-)

diff --git a/.codecov.yml b/.codecov.yml
index 7a57a7568..5371c00ad 100644
--- a/.codecov.yml
+++ b/.codecov.yml
@@ -7,6 +7,6 @@ coverage:
       default:
         informational: true
     changes: false
-comment: off
+comment: on
 ignore:
   - graphblas/viz.py
diff --git a/graphblas/core/operator/base.py b/graphblas/core/operator/base.py
index cddee6a33..0e79385fc 100644
--- a/graphblas/core/operator/base.py
+++ b/graphblas/core/operator/base.py
@@ -342,9 +342,18 @@ def __getitem__(self, type_):
         dtype = lookup_dtype(type_)
         return self._compile_udt(dtype, dtype)
 
-    def _add(self, op):
-        self._typed_ops[op.type] = op
-        self.types[op.type] = op.return_type
+    def _add(self, op, *, is_jit=False):
+        if is_jit:
+            if hasattr(op, "type2") or hasattr(op, "thunk_type"):
+                dtypes = (op.type, op._type2)
+            else:
+                dtypes = op.type
+            self.types[dtypes] = op.return_type  # This is a different use of .types
+            self._udt_types[dtypes] = op.return_type
+            self._udt_ops[dtypes] = op
+        else:
+            self._typed_ops[op.type] = op
+            self.types[op.type] = op.return_type
 
     def __delitem__(self, type_):
         type_ = lookup_dtype(type_)
diff --git a/graphblas/core/operator/binary.py b/graphblas/core/operator/binary.py
index 77a686868..0b229d1a0 100644
--- a/graphblas/core/operator/binary.py
+++ b/graphblas/core/operator/binary.py
@@ -597,6 +597,8 @@ def binary_wrapper(z_ptr, x_ptr, y_ptr):  # pragma: no cover (numba)
                     #     z_ptr[0] = False
                     z_ptr[0] = (x[mask] != y[mask]).any()
 
+        elif self._numba_func is None:
+            raise KeyError(f"{self.name} does not work with {dtypes} types")
         else:
             numba_func = self._numba_func
             sig = (dtype.numba_type, dtype2.numba_type)
diff --git a/graphblas/core/operator/indexunary.py b/graphblas/core/operator/indexunary.py
index b5351e916..b6fc74e91 100644
--- a/graphblas/core/operator/indexunary.py
+++ b/graphblas/core/operator/indexunary.py
@@ -25,6 +25,10 @@ def __call__(self, val, thunk=None):
             thunk = False  # most basic form of 0 when unifying dtypes
         return _call_op(self, val, right=thunk)
 
+    @property
+    def thunk_type(self):
+        return self.type if self._type2 is None else self._type2
+
 
 class TypedUserIndexUnaryOp(TypedOpBase):
     __slots__ = ()
@@ -41,6 +45,7 @@ def orig_func(self):
     def _numba_func(self):
         return self.parent._numba_func
 
+    thunk_type = TypedBuiltinIndexUnaryOp.thunk_type
     __call__ = TypedBuiltinIndexUnaryOp.__call__
 
 
@@ -210,6 +215,8 @@ def _compile_udt(self, dtype, dtype2):
         dtypes = (dtype, dtype2)
         if dtypes in self._udt_types:
             return self._udt_ops[dtypes]
+        if self._numba_func is None:
+            raise KeyError(f"{self.name} does not work with {dtypes} types")
 
         numba_func = self._numba_func
         sig = (dtype.numba_type, UINT64.numba_type, UINT64.numba_type, dtype2.numba_type)
diff --git a/graphblas/core/operator/select.py b/graphblas/core/operator/select.py
index 4c9cd4639..4dd65ef16 100644
--- a/graphblas/core/operator/select.py
+++ b/graphblas/core/operator/select.py
@@ -1,9 +1,17 @@
 import inspect
 
 from ... import _STANDARD_OPERATOR_NAMES, select
-from ...dtypes import BOOL
+from ...dtypes import BOOL, UINT64
+from ...exceptions import check_status_carg
+from .. import _has_numba, ffi, lib
 from .base import OpBase, ParameterizedUdf, TypedOpBase, _call_op, _deserialize_parameterized
-from .indexunary import IndexUnaryOp
+from .indexunary import IndexUnaryOp, TypedBuiltinIndexUnaryOp
+
+if _has_numba:
+    import numba
+
+    from .base import _get_udt_wrapper
+ffi_new = ffi.new
 
 
 class TypedBuiltinSelectOp(TypedOpBase):
@@ -15,13 +23,15 @@ def __call__(self, val, thunk=None):
             thunk = False  # most basic form of 0 when unifying dtypes
         return _call_op(self, val, thunk=thunk)
 
+    thunk_type = TypedBuiltinIndexUnaryOp.thunk_type
+
 
 class TypedUserSelectOp(TypedOpBase):
     __slots__ = ()
     opclass = "SelectOp"
 
-    def __init__(self, parent, name, type_, return_type, gb_obj):
-        super().__init__(parent, name, type_, return_type, gb_obj, f"{name}_{type_}")
+    def __init__(self, parent, name, type_, return_type, gb_obj, dtype2=None):
+        super().__init__(parent, name, type_, return_type, gb_obj, f"{name}_{type_}", dtype2=dtype2)
 
     @property
     def orig_func(self):
@@ -31,6 +41,7 @@ def orig_func(self):
     def _numba_func(self):
         return self.parent._numba_func
 
+    thunk_type = TypedBuiltinSelectOp.thunk_type
     __call__ = TypedBuiltinSelectOp.__call__
 
 
@@ -120,6 +131,44 @@ def _from_indexunary(cls, iop):
             obj.types[type_] = op.return_type
         return obj
 
+    def _compile_udt(self, dtype, dtype2):
+        if dtype2 is None:  # pragma: no cover
+            dtype2 = dtype
+        dtypes = (dtype, dtype2)
+        if dtypes in self._udt_types:
+            return self._udt_ops[dtypes]
+        if self._numba_func is None:
+            raise KeyError(f"{self.name} does not work with {dtypes} types")
+
+        # It would be nice if we could reuse compiling done for IndexUnaryOp
+        numba_func = self._numba_func
+        sig = (dtype.numba_type, UINT64.numba_type, UINT64.numba_type, dtype2.numba_type)
+        numba_func.compile(sig)  # Should we catch and give additional error message?
+        select_wrapper, wrapper_sig = _get_udt_wrapper(
+            numba_func, BOOL, dtype, dtype2, include_indexes=True
+        )
+
+        select_wrapper = numba.cfunc(wrapper_sig, nopython=True)(select_wrapper)
+        new_select = ffi_new("GrB_IndexUnaryOp*")
+        check_status_carg(
+            lib.GrB_IndexUnaryOp_new(
+                new_select, select_wrapper.cffi, BOOL._carg, dtype._carg, dtype2._carg
+            ),
+            "IndexUnaryOp",
+            new_select[0],
+        )
+        op = TypedUserSelectOp(
+            self,
+            self.name,
+            dtype,
+            BOOL,
+            new_select[0],
+            dtype2=dtype2,
+        )
+        self._udt_types[dtypes] = BOOL
+        self._udt_ops[dtypes] = op
+        return op
+
     @classmethod
     def register_anonymous(cls, func, name=None, *, parameterized=False, is_udt=False):
         """Register a SelectOp without registering it in the ``graphblas.select`` namespace.
diff --git a/graphblas/core/operator/unary.py b/graphblas/core/operator/unary.py
index 437334ccc..7484f74d9 100644
--- a/graphblas/core/operator/unary.py
+++ b/graphblas/core/operator/unary.py
@@ -252,6 +252,8 @@ def unary_wrapper(z, x):
     def _compile_udt(self, dtype, dtype2):
         if dtype in self._udt_types:
             return self._udt_ops[dtype]
+        if self._numba_func is None:
+            raise KeyError(f"{self.name} does not work with {dtype}")
 
         numba_func = self._numba_func
         sig = (dtype.numba_type,)
diff --git a/graphblas/core/ss/binary.py b/graphblas/core/ss/binary.py
index 898257fac..6965aeaf1 100644
--- a/graphblas/core/ss/binary.py
+++ b/graphblas/core/ss/binary.py
@@ -31,6 +31,47 @@ def jit_c_definition(self):
 
 
 def register_new(name, jit_c_definition, left_type, right_type, ret_type):
+    """Register a new BinaryOp using the SuiteSparse:GraphBLAS JIT compiler.
+
+    This creates a BinaryOp by compiling the C string definition of the function.
+    It requires a shell call to a C compiler. The resulting operator will be as
+    fast as if it were built-in to SuiteSparse:GraphBLAS and does not have the
+    overhead of additional function calls as when using ``gb.binary.register_new``.
+
+    This is an advanced feature that requires a C compiler and proper configuration.
+    Configuration is handled by ``gb.ss.config``; see its docstring for details.
+    By default, the JIT caches results in ``~/.SuiteSparse/``. For more information,
+    see the SuiteSparse:GraphBLAS user guide.
+
+    Only one type signature may be registered at a time, but repeated calls using
+    the same name with different input types is allowed.
+
+    Parameters
+    ----------
+    name : str
+        The name of the operator. This will show up as ``gb.binary.ss.{name}``.
+        The name may contain periods, ".", which will result in nested objects
+        such as ``gb.binary.ss.x.y.z`` for name ``"x.y.z"``.
+    jit_c_definition : str
+        The C definition as a string of the user-defined function. For example:
+        ``"void absdiff (double *z, double *x, double *y) { (*z) = fabs ((*x) - (*y)) ; }"``.
+    left_type : dtype
+        The dtype of the left operand of the binary operator.
+    right_type : dtype
+        The dtype of the right operand of the binary operator.
+    ret_type : dtype
+        The dtype of the result of the binary operator.
+
+    Returns
+    -------
+    BinaryOp
+
+    See Also
+    --------
+    gb.binary.register_new
+    gb.binary.register_anonymous
+    gb.unary.ss.register_new
+    """
     if backend != "suitesparse":  # pragma: no cover (safety)
         raise RuntimeError(
             "`gb.binary.ss.register_new` invalid when not using 'suitesparse' backend"
@@ -47,9 +88,23 @@ def register_new(name, jit_c_definition, left_type, right_type, ret_type):
     right_type = lookup_dtype(right_type)
     ret_type = lookup_dtype(ret_type)
     name = name if name.startswith("ss.") else f"ss.{name}"
-    module, funcname = BinaryOp._remove_nesting(name)
-
-    rv = BinaryOp(name)
+    module, funcname = BinaryOp._remove_nesting(name, strict=False)
+    if hasattr(module, funcname):
+        rv = getattr(module, funcname)
+        if not isinstance(rv, BinaryOp):
+            BinaryOp._remove_nesting(name)
+        if (
+            (left_type, right_type) in rv.types
+            or rv._udt_types is not None
+            and (left_type, right_type) in rv._udt_types
+        ):
+            raise TypeError(
+                f"BinaryOp gb.binary.{name} already defined for "
+                f"({left_type}, {right_type}) input types"
+            )
+    else:
+        # We use `is_udt=True` to make dtype handling flexible and explicit.
+        rv = BinaryOp(name, is_udt=True)
     gb_obj = ffi_new("GrB_BinaryOp*")
     check_status_carg(
         lib.GxB_BinaryOp_new(
@@ -67,6 +122,6 @@ def register_new(name, jit_c_definition, left_type, right_type, ret_type):
     op = TypedJitBinaryOp(
         rv, funcname, left_type, ret_type, gb_obj[0], jit_c_definition, dtype2=right_type
     )
-    rv._add(op)
+    rv._add(op, is_jit=True)
     setattr(module, funcname, rv)
     return rv
diff --git a/graphblas/core/ss/indexunary.py b/graphblas/core/ss/indexunary.py
index c0f185737..d5f709526 100644
--- a/graphblas/core/ss/indexunary.py
+++ b/graphblas/core/ss/indexunary.py
@@ -21,10 +21,56 @@ def __init__(self, parent, name, type_, return_type, gb_obj, jit_c_definition, d
     def jit_c_definition(self):
         return self._jit_c_definition
 
+    thunk_type = TypedUserIndexUnaryOp.thunk_type
     __call__ = TypedUserIndexUnaryOp.__call__
 
 
 def register_new(name, jit_c_definition, input_type, thunk_type, ret_type):
+    """Register a new IndexUnaryOp using the SuiteSparse:GraphBLAS JIT compiler.
+
+    This creates a IndexUnaryOp by compiling the C string definition of the function.
+    It requires a shell call to a C compiler. The resulting operator will be as
+    fast as if it were built-in to SuiteSparse:GraphBLAS and does not have the
+    overhead of additional function calls as when using ``gb.indexunary.register_new``.
+
+    This is an advanced feature that requires a C compiler and proper configuration.
+    Configuration is handled by ``gb.ss.config``; see its docstring for details.
+    By default, the JIT caches results in ``~/.SuiteSparse/``. For more information,
+    see the SuiteSparse:GraphBLAS user guide.
+
+    Only one type signature may be registered at a time, but repeated calls using
+    the same name with different input types is allowed.
+
+    This will also create a SelectOp operator under ``gb.select.ss`` if the return
+    type is boolean.
+
+    Parameters
+    ----------
+    name : str
+        The name of the operator. This will show up as ``gb.indexunary.ss.{name}``.
+        The name may contain periods, ".", which will result in nested objects
+        such as ``gb.indexunary.ss.x.y.z`` for name ``"x.y.z"``.
+    jit_c_definition : str
+        The C definition as a string of the user-defined function. For example:
+        ``"void diffy (double *z, double *x, GrB_Index i, GrB_Index j, double *y) "``
+        ``"{ (*z) = (i + j) * fabs ((*x) - (*y)) ; }"``
+    input_type : dtype
+        The dtype of the operand of the indexunary operator.
+    thunk_type : dtype
+        The dtype of the thunk of the indexunary operator.
+    ret_type : dtype
+        The dtype of the result of the indexunary operator.
+
+    Returns
+    -------
+    IndexUnaryOp
+
+    See Also
+    --------
+    gb.indexunary.register_new
+    gb.indexunary.register_anonymous
+    gb.select.ss.register_new
+    """
     if backend != "suitesparse":  # pragma: no cover (safety)
         raise RuntimeError(
             "`gb.indexunary.ss.register_new` invalid when not using 'suitesparse' backend"
@@ -41,9 +87,23 @@ def register_new(name, jit_c_definition, input_type, thunk_type, ret_type):
     thunk_type = lookup_dtype(thunk_type)
     ret_type = lookup_dtype(ret_type)
     name = name if name.startswith("ss.") else f"ss.{name}"
-    module, funcname = IndexUnaryOp._remove_nesting(name)
-
-    rv = IndexUnaryOp(name)
+    module, funcname = IndexUnaryOp._remove_nesting(name, strict=False)
+    if hasattr(module, funcname):
+        rv = getattr(module, funcname)
+        if not isinstance(rv, IndexUnaryOp):
+            IndexUnaryOp._remove_nesting(name)
+        if (
+            (input_type, thunk_type) in rv.types
+            or rv._udt_types is not None
+            and (input_type, thunk_type) in rv._udt_types
+        ):
+            raise TypeError(
+                f"IndexUnaryOp gb.indexunary.{name} already defined for "
+                f"({input_type}, {thunk_type}) input types"
+            )
+    else:
+        # We use `is_udt=True` to make dtype handling flexible and explicit.
+        rv = IndexUnaryOp(name, is_udt=True)
     gb_obj = ffi_new("GrB_IndexUnaryOp*")
     check_status_carg(
         lib.GxB_IndexUnaryOp_new(
@@ -61,17 +121,32 @@ def register_new(name, jit_c_definition, input_type, thunk_type, ret_type):
     op = TypedJitIndexUnaryOp(
         rv, funcname, input_type, ret_type, gb_obj[0], jit_c_definition, dtype2=thunk_type
     )
-    rv._add(op)
+    rv._add(op, is_jit=True)
     if ret_type == BOOL:
         from ..operator.select import SelectOp
         from .select import TypedJitSelectOp
 
         select_module, funcname = SelectOp._remove_nesting(name, strict=False)
-        selectop = SelectOp(name)
+        if hasattr(select_module, funcname):
+            selectop = getattr(select_module, funcname)
+            if not isinstance(selectop, SelectOp):
+                SelectOp._remove_nesting(name)
+            if (
+                (input_type, thunk_type) in selectop.types
+                or selectop._udt_types is not None
+                and (input_type, thunk_type) in selectop._udt_types
+            ):
+                raise TypeError(
+                    f"SelectOp gb.select.{name} already defined for "
+                    f"({input_type}, {thunk_type}) input types"
+                )
+        else:
+            # We use `is_udt=True` to make dtype handling flexible and explicit.
+            selectop = SelectOp(name, is_udt=True)
         op2 = TypedJitSelectOp(
-            rv, funcname, input_type, ret_type, gb_obj[0], jit_c_definition, dtype2=thunk_type
+            selectop, funcname, input_type, ret_type, gb_obj[0], jit_c_definition, dtype2=thunk_type
         )
-        selectop._add(op2)
+        selectop._add(op2, is_jit=True)
         setattr(select_module, funcname, selectop)
     setattr(module, funcname, rv)
     return rv
diff --git a/graphblas/core/ss/select.py b/graphblas/core/ss/select.py
index 37c352b67..ff12f80fa 100644
--- a/graphblas/core/ss/select.py
+++ b/graphblas/core/ss/select.py
@@ -20,10 +20,53 @@ def __init__(self, parent, name, type_, return_type, gb_obj, jit_c_definition, d
     def jit_c_definition(self):
         return self._jit_c_definition
 
+    thunk_type = TypedUserSelectOp.thunk_type
     __call__ = TypedUserSelectOp.__call__
 
 
 def register_new(name, jit_c_definition, input_type, thunk_type):
+    """Register a new SelectOp using the SuiteSparse:GraphBLAS JIT compiler.
+
+    This creates a SelectOp by compiling the C string definition of the function.
+    It requires a shell call to a C compiler. The resulting operator will be as
+    fast as if it were built-in to SuiteSparse:GraphBLAS and does not have the
+    overhead of additional function calls as when using ``gb.select.register_new``.
+
+    This is an advanced feature that requires a C compiler and proper configuration.
+    Configuration is handled by ``gb.ss.config``; see its docstring for details.
+    By default, the JIT caches results in ``~/.SuiteSparse/``. For more information,
+    see the SuiteSparse:GraphBLAS user guide.
+
+    Only one type signature may be registered at a time, but repeated calls using
+    the same name with different input types is allowed.
+
+    This will also create an IndexUnary operator under ``gb.indexunary.ss``
+
+    Parameters
+    ----------
+    name : str
+        The name of the operator. This will show up as ``gb.select.ss.{name}``.
+        The name may contain periods, ".", which will result in nested objects
+        such as ``gb.select.ss.x.y.z`` for name ``"x.y.z"``.
+    jit_c_definition : str
+        The C definition as a string of the user-defined function. For example:
+        ``"void woot (bool *z, const int32_t *x, GrB_Index i, GrB_Index j, int32_t *y) "``
+        ``"{ (*z) = ((*x) + i + j == (*y)) ; }"``
+    input_type : dtype
+        The dtype of the operand of the select operator.
+    thunk_type : dtype
+        The dtype of the thunk of the select operator.
+
+    Returns
+    -------
+    SelectOp
+
+    See Also
+    --------
+    gb.select.register_new
+    gb.select.register_anonymous
+    gb.indexunary.ss.register_new
+    """
     if backend != "suitesparse":  # pragma: no cover (safety)
         raise RuntimeError(
             "`gb.select.ss.register_new` invalid when not using 'suitesparse' backend"
diff --git a/graphblas/core/ss/unary.py b/graphblas/core/ss/unary.py
index 97c4614c0..5a5c63632 100644
--- a/graphblas/core/ss/unary.py
+++ b/graphblas/core/ss/unary.py
@@ -25,6 +25,45 @@ def jit_c_definition(self):
 
 
 def register_new(name, jit_c_definition, input_type, ret_type):
+    """Register a new UnaryOp using the SuiteSparse:GraphBLAS JIT compiler.
+
+    This creates a UnaryOp by compiling the C string definition of the function.
+    It requires a shell call to a C compiler. The resulting operator will be as
+    fast as if it were built-in to SuiteSparse:GraphBLAS and does not have the
+    overhead of additional function calls as when using ``gb.unary.register_new``.
+
+    This is an advanced feature that requires a C compiler and proper configuration.
+    Configuration is handled by ``gb.ss.config``; see its docstring for details.
+    By default, the JIT caches results in ``~/.SuiteSparse/``. For more information,
+    see the SuiteSparse:GraphBLAS user guide.
+
+    Only one type signature may be registered at a time, but repeated calls using
+    the same name with different input types is allowed.
+
+    Parameters
+    ----------
+    name : str
+        The name of the operator. This will show up as ``gb.unary.ss.{name}``.
+        The name may contain periods, ".", which will result in nested objects
+        such as ``gb.unary.ss.x.y.z`` for name ``"x.y.z"``.
+    jit_c_definition : str
+        The C definition as a string of the user-defined function. For example:
+        ``"void square (float *z, float *x) { (*z) = (*x) * (*x) ; } ;"``
+    input_type : dtype
+        The dtype of the operand of the unary operator.
+    ret_type : dtype
+        The dtype of the result of the unary operator.
+
+    Returns
+    -------
+    UnaryOp
+
+    See Also
+    --------
+    gb.unary.register_new
+    gb.unary.register_anonymous
+    gb.binary.ss.register_new
+    """
     if backend != "suitesparse":  # pragma: no cover (safety)
         raise RuntimeError(
             "`gb.unary.ss.register_new` invalid when not using 'suitesparse' backend"
@@ -40,9 +79,16 @@ def register_new(name, jit_c_definition, input_type, ret_type):
     input_type = lookup_dtype(input_type)
     ret_type = lookup_dtype(ret_type)
     name = name if name.startswith("ss.") else f"ss.{name}"
-    module, funcname = UnaryOp._remove_nesting(name)
-
-    rv = UnaryOp(name)
+    module, funcname = UnaryOp._remove_nesting(name, strict=False)
+    if hasattr(module, funcname):
+        rv = getattr(module, funcname)
+        if not isinstance(rv, UnaryOp):
+            UnaryOp._remove_nesting(name)
+        if input_type in rv.types or rv._udt_types is not None and input_type in rv._udt_types:
+            raise TypeError(f"UnaryOp gb.unary.{name} already defined for {input_type} input type")
+    else:
+        # We use `is_udt=True` to make dtype handling flexible and explicit.
+        rv = UnaryOp(name, is_udt=True)
     gb_obj = ffi_new("GrB_UnaryOp*")
     check_status_carg(
         lib.GxB_UnaryOp_new(
@@ -57,6 +103,6 @@ def register_new(name, jit_c_definition, input_type, ret_type):
         gb_obj[0],
     )
     op = TypedJitUnaryOp(rv, funcname, input_type, ret_type, gb_obj[0], jit_c_definition)
-    rv._add(op)
+    rv._add(op, is_jit=True)
     setattr(module, funcname, rv)
     return rv
diff --git a/graphblas/tests/test_op.py b/graphblas/tests/test_op.py
index b54ea76c4..39ae7e9c0 100644
--- a/graphblas/tests/test_op.py
+++ b/graphblas/tests/test_op.py
@@ -19,7 +19,7 @@
 )
 from graphblas.core import _supports_udfs as supports_udfs
 from graphblas.core import lib, operator
-from graphblas.core.operator import BinaryOp, IndexUnaryOp, Monoid, Semiring, UnaryOp, get_semiring
+from graphblas.core.operator import BinaryOp, IndexUnaryOp, Monoid, Semiring, UnaryOp, get_semiring, SelectOp
 from graphblas.dtypes import (
     BOOL,
     FP32,
@@ -1336,6 +1336,19 @@ def badfunc2(x, y):  # pragma: no cover (numba)
     assert binary.first[udt, dtypes.INT8].type2 is dtypes.INT8
     assert monoid.any[udt].type2 is udt
 
+    def _this_or_that(val, idx, _, thunk):  # pragma: no cover (numba)
+        return val["x"]
+
+    sel = SelectOp.register_anonymous(_this_or_that, is_udt=True)
+    sel[udt]
+    assert udt in sel
+    result = v.select(sel, 0).new()
+    assert result.nvals == 0
+    assert result.dtype == v.dtype
+    result = w.select(sel, 0).new()
+    assert result.nvals == 3
+    assert result.isequal(w)
+
 
 def test_dir():
     for mod in [unary, binary, monoid, semiring, op]:
diff --git a/graphblas/tests/test_ssjit.py b/graphblas/tests/test_ssjit.py
index bd05cf2db..3c974c50d 100644
--- a/graphblas/tests/test_ssjit.py
+++ b/graphblas/tests/test_ssjit.py
@@ -165,6 +165,20 @@ def test_jit_unary(v):
     expected = Vector.from_coo([1, 3, 4, 6], [1, 1, 4, 0], dtype="FP32")
     assert expected.isequal(v)
     assert square["FP32"].jit_c_definition == cdef
+    assert "FP64" not in square
+    with burble():
+        square_fp64 = unary.ss.register_new(
+            "square", cdef.replace("float", "double"), "FP64", "FP64"
+        )
+    assert square_fp64 is square
+    assert "FP64" in square
+    with pytest.raises(
+        TypeError, match="UnaryOp gb.unary.ss.square already defined for FP32 input type"
+    ):
+        unary.ss.register_new("square", cdef, "FP32", "FP32")
+    unary.ss.register_new("nested.square", cdef, "FP32", "FP32")
+    with pytest.raises(AttributeError, match="nested is already defined"):
+        unary.ss.register_new("nested", cdef, "FP32", "FP32")
 
 
 def test_jit_binary(v):
@@ -186,9 +200,11 @@ def test_jit_binary(v):
     assert not hasattr(binary, "absdiff")
     assert binary.ss.absdiff is absdiff
     assert absdiff.name == "ss.absdiff"
-    assert absdiff.types == {dtypes.FP64: dtypes.FP64}
+    assert absdiff.types == {(dtypes.FP64, dtypes.FP64): dtypes.FP64}  # different than normal
+    assert "FP64" in absdiff
+    assert absdiff["FP64"].return_type == dtypes.FP64
     # The JIT is unforgiving and does not coerce--use the correct types!
-    with pytest.raises(KeyError, match="absdiff does not work with INT64"):
+    with pytest.raises(KeyError, match="absdiff does not work with .INT64, INT64. types"):
         v << absdiff(v & v)
     w = (v - 1).new("FP64")
     v = v.dup("FP64")
@@ -198,6 +214,36 @@ def test_jit_binary(v):
     res = absdiff(w & v).new()
     assert expected.isequal(res)
     assert absdiff["FP64"].jit_c_definition == cdef
+    assert "FP32" not in absdiff
+    with burble():
+        absdiff_fp32 = binary.ss.register_new(
+            "absdiff",
+            cdef.replace("FP64", "FP32").replace("fabs", "fabsf"),
+            "FP32",
+            "FP32",
+            "FP32",
+        )
+    assert absdiff_fp32 is absdiff
+    assert "FP32" in absdiff
+    with pytest.raises(
+        TypeError,
+        match="BinaryOp gb.binary.ss.absdiff already defined for .FP64, FP64. input types",
+    ):
+        binary.ss.register_new("absdiff", cdef, "FP64", "FP64", "FP64")
+    binary.ss.register_new("nested.absdiff", cdef, "FP64", "FP64", "FP64")
+    with pytest.raises(AttributeError, match="nested is already defined"):
+        binary.ss.register_new("nested", cdef, "FP64", "FP64", "FP64")
+    # Make sure we can be specific with left/right dtypes
+    absdiff_mixed = binary.ss.register_new(
+        "absdiff",
+        "void absdiff (double *z, double *x, float *y) { (*z) = fabs ((*x) - (double)(*y)) ; }",
+        "FP64",
+        "FP32",
+        "FP64",
+    )
+    assert absdiff_mixed is absdiff
+    assert ("FP64", "FP32") in absdiff
+    assert ("FP32", "FP64") not in absdiff
 
 
 def test_jit_indexunary(v):
@@ -218,15 +264,50 @@ def test_jit_indexunary(v):
     assert not hasattr(select, "diffy")
     assert not hasattr(select.ss, "diffy")
     assert diffy.name == "ss.diffy"
-    assert diffy.types == {dtypes.FP64: dtypes.FP64}
+    assert diffy.types == {(dtypes.FP64, dtypes.FP64): dtypes.FP64}
+    assert "FP64" in diffy
+    assert diffy["FP64"].return_type == dtypes.FP64
     # The JIT is unforgiving and does not coerce--use the correct types!
-    with pytest.raises(KeyError, match="diffy does not work with INT64"):
+    with pytest.raises(KeyError, match="diffy does not work with .INT64, INT64. types"):
         v << diffy(v, 1)
     v = v.dup("FP64")
-    res = diffy(v, -1).new()
+    with pytest.raises(KeyError, match="diffy does not work with .FP64, INT64. types"):
+        v << diffy(v, -1)
+    res = diffy(v, -1.0).new()
     expected = Vector.from_coo([1, 3, 4, 6], [2, 6, 12, 6], dtype="FP64")
     assert expected.isequal(res)
     assert diffy["FP64"].jit_c_definition == cdef
+    assert "FP32" not in diffy
+    with burble():
+        diffy_fp32 = indexunary.ss.register_new(
+            "diffy",
+            cdef.replace("double", "float").replace("fabs", "fabsf"),
+            "FP32",
+            "FP32",
+            "FP32",
+        )
+    assert diffy_fp32 is diffy
+    assert "FP32" in diffy
+    with pytest.raises(
+        TypeError,
+        match="IndexUnaryOp gb.indexunary.ss.diffy already defined for .FP64, FP64. input types",
+    ):
+        indexunary.ss.register_new("diffy", cdef, "FP64", "FP64", "FP64")
+    indexunary.ss.register_new("nested.diffy", cdef, "FP64", "FP64", "FP64")
+    with pytest.raises(AttributeError, match="nested is already defined"):
+        indexunary.ss.register_new("nested", cdef, "FP64", "FP64", "FP64")
+    # Make sure we can be specific with left/right dtypes
+    diffy_mixed = indexunary.ss.register_new(
+        "diffy",
+        "void diffy (double *z, double *x, GrB_Index i, GrB_Index j, float *y) "
+        "{ (*z) = (i + j) * fabs ((*x) - (double)(*y)) ; }",
+        "FP64",
+        "FP32",
+        "FP64",
+    )
+    assert diffy_mixed is diffy
+    assert ("FP64", "FP32") in diffy
+    assert ("FP32", "FP64") not in diffy
 
 
 def test_jit_select(v):
@@ -248,20 +329,57 @@ def test_jit_select(v):
     assert not hasattr(indexunary, "woot")
     assert hasattr(indexunary.ss, "woot")
     assert woot.name == "ss.woot"
-    assert woot.types == {dtypes.INT32: dtypes.BOOL}
+    assert woot.types == {(dtypes.INT32, dtypes.INT32): dtypes.BOOL}
+    assert "INT32" in woot
+    assert woot["INT32"].return_type == dtypes.BOOL
     # The JIT is unforgiving and does not coerce--use the correct types!
-    with pytest.raises(KeyError, match="woot does not work with INT64"):
+    with pytest.raises(KeyError, match="woot does not work with .INT64, INT64. types"):
         v << woot(v, 1)
     v = v.dup("INT32")
-    res = woot(v, 6).new()
+    with pytest.raises(KeyError, match="woot does not work with .INT32, INT64. types"):
+        v << woot(v, 6)
+    res = woot(v, gb.Scalar.from_value(6, "INT32")).new()
     expected = Vector.from_coo([4, 6], [2, 0])
     assert expected.isequal(res)
 
-    res = indexunary.ss.woot(v, 6).new()
+    res = indexunary.ss.woot(v, gb.Scalar.from_value(6, "INT32")).new()
     expected = Vector.from_coo([1, 3, 4, 6], [False, False, True, True])
     assert expected.isequal(res)
     assert woot["INT32"].jit_c_definition == cdef
 
+    assert "INT64" not in woot
+    with burble():
+        woot_int64 = select.ss.register_new(
+            "woot", cdef.replace("int32", "int64"), "INT64", "INT64"
+        )
+    assert woot_int64 is woot
+    assert "INT64" in woot
+    with pytest.raises(TypeError, match="ss.woot already defined for .INT32, INT32. input types"):
+        select.ss.register_new("woot", cdef, "INT32", "INT32")
+    del indexunary.ss.woot
+    with pytest.raises(TypeError, match="ss.woot already defined for .INT32, INT32. input types"):
+        select.ss.register_new("woot", cdef, "INT32", "INT32")
+    select.ss.register_new("nested.woot", cdef, "INT32", "INT32")
+    with pytest.raises(AttributeError, match="nested is already defined"):
+        select.ss.register_new("nested", cdef, "INT32", "INT32")
+    del indexunary.ss.nested
+    with pytest.raises(AttributeError, match="nested is already defined"):
+        select.ss.register_new("nested", cdef.replace("woot", "nested"), "INT32", "INT32")
+    select.ss.haha = "haha"
+    with pytest.raises(AttributeError, match="haha is already defined"):
+        select.ss.register_new("haha", cdef.replace("woot", "haha"), "INT32", "INT32")
+    # Make sure we can be specific with left/right dtypes
+    woot_mixed = select.ss.register_new(
+        "woot",
+        "void woot (bool *z, const int64_t *x, GrB_Index i, GrB_Index j, int32_t *y) "
+        "{ (*z) = ((*x) + i + j == (*y)) ; }",
+        "INT64",
+        "INT32",
+    )
+    assert woot_mixed is woot
+    assert ("INT64", "INT32") in woot
+    assert ("INT32", "INT64") not in woot
+
 
 def test_context_importable():
     if _IS_SSGB7:

From 75a67bcbbc52ef4c7addd85a41d022fd1d748a27 Mon Sep 17 00:00:00 2001
From: Erik Welch <erik.n.welch@gmail.com>
Date: Sun, 15 Oct 2023 13:30:18 -0500
Subject: [PATCH 2/7] dunno if mamba doesn't do this strictly or not, but
 strict is needed

---
 .github/workflows/test_and_build.yml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/.github/workflows/test_and_build.yml b/.github/workflows/test_and_build.yml
index 1a7dff313..ac965988d 100644
--- a/.github/workflows/test_and_build.yml
+++ b/.github/workflows/test_and_build.yml
@@ -144,8 +144,7 @@ jobs:
           use-mamba: true
           python-version: ${{ steps.pyver.outputs.selected }}
           channels: conda-forge,${{ contains(steps.pyver.outputs.selected, 'pypy') && 'defaults' || 'nodefaults' }}
-          # mamba does not yet implement strict priority
-          # channel-priority: ${{ contains(steps.pyver.outputs.selected, 'pypy') && 'flexible' || 'strict' }}
+          channel-priority: ${{ contains(steps.pyver.outputs.selected, 'pypy') && 'flexible' || 'strict' }}
           activate-environment: graphblas
           auto-activate-base: false
       - name: Setup conda

From ab066c2256200d36d49c540644dc5c9f63bbd27f Mon Sep 17 00:00:00 2001
From: Erik Welch <erik.n.welch@gmail.com>
Date: Sun, 15 Oct 2023 14:51:46 -0500
Subject: [PATCH 3/7] Fix when numba is not installed

---
 graphblas/core/operator/base.py   |  2 --
 graphblas/core/operator/binary.py |  7 ++++---
 graphblas/tests/test_op.py        | 10 +++++++++-
 3 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/graphblas/core/operator/base.py b/graphblas/core/operator/base.py
index 0e79385fc..d66aa2f4a 100644
--- a/graphblas/core/operator/base.py
+++ b/graphblas/core/operator/base.py
@@ -336,8 +336,6 @@ def __getitem__(self, type_):
                     raise KeyError(f"{self.name} does not work with {type_}")
             else:
                 return self._typed_ops[type_]
-        if not _supports_udfs:
-            raise KeyError(f"{self.name} does not work with {type_}")
         # This is a UDT or is able to operate on UDTs such as `first` any `any`
         dtype = lookup_dtype(type_)
         return self._compile_udt(dtype, dtype)
diff --git a/graphblas/core/operator/binary.py b/graphblas/core/operator/binary.py
index 0b229d1a0..676ed0970 100644
--- a/graphblas/core/operator/binary.py
+++ b/graphblas/core/operator/binary.py
@@ -523,8 +523,8 @@ def _compile_udt(self, dtype, dtype2):
         if dtypes in self._udt_types:
             return self._udt_ops[dtypes]
 
-        nt = numba.types
-        if self.name == "eq" and not self._anonymous:
+        if self.name == "eq" and not self._anonymous and _has_numba:
+            nt = numba.types
             # assert dtype.np_type == dtype2.np_type
             itemsize = dtype.np_type.itemsize
             mask = _udt_mask(dtype.np_type)
@@ -561,7 +561,8 @@ def binary_wrapper(z_ptr, x_ptr, y_ptr):  # pragma: no cover (numba)
                     #     z_ptr[0] = True
                     z_ptr[0] = (x[mask] == y[mask]).all()
 
-        elif self.name == "ne" and not self._anonymous:
+        elif self.name == "ne" and not self._anonymous and _has_numba:
+            nt = numba.types
             # assert dtype.np_type == dtype2.np_type
             itemsize = dtype.np_type.itemsize
             mask = _udt_mask(dtype.np_type)
diff --git a/graphblas/tests/test_op.py b/graphblas/tests/test_op.py
index 39ae7e9c0..c7d1ce97c 100644
--- a/graphblas/tests/test_op.py
+++ b/graphblas/tests/test_op.py
@@ -19,7 +19,15 @@
 )
 from graphblas.core import _supports_udfs as supports_udfs
 from graphblas.core import lib, operator
-from graphblas.core.operator import BinaryOp, IndexUnaryOp, Monoid, Semiring, UnaryOp, get_semiring, SelectOp
+from graphblas.core.operator import (
+    BinaryOp,
+    IndexUnaryOp,
+    Monoid,
+    SelectOp,
+    Semiring,
+    UnaryOp,
+    get_semiring,
+)
 from graphblas.dtypes import (
     BOOL,
     FP32,

From b5d14de31e69c173b517deeb61fe1a228bed7390 Mon Sep 17 00:00:00 2001
From: Erik Welch <erik.n.welch@gmail.com>
Date: Mon, 16 Oct 2023 10:06:42 -0500
Subject: [PATCH 4/7] black is back

---
 .github/workflows/test_and_build.yml | 3 ---
 .pre-commit-config.yaml              | 2 +-
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/.github/workflows/test_and_build.yml b/.github/workflows/test_and_build.yml
index ac965988d..d4504e2fd 100644
--- a/.github/workflows/test_and_build.yml
+++ b/.github/workflows/test_and_build.yml
@@ -411,9 +411,6 @@ jobs:
         if: matrix.slowtask == 'pytest_bizarro'
         run: |
           # This step uses `black`
-          if [[ ${{ startsWith(steps.pyver.outputs.selected, '3.12') }} == true ]]; then
-            pip install black  # Latest version of black on conda-forge does not have builds for Python 3.12
-          fi
           coverage run -a -m graphblas.core.automethods
           coverage run -a -m graphblas.core.infixmethods
           git diff --exit-code
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index afdc6c75b..96c8b9aeb 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -98,7 +98,7 @@ repos:
     hooks:
       - id: ruff
   - repo: https://github.com/sphinx-contrib/sphinx-lint
-    rev: v0.7.0
+    rev: v0.8.0
     hooks:
       - id: sphinx-lint
         args: [--enable, all, "--disable=line-too-long,leaked-markup"]

From feca9330331f7ae021fc32f0319dd45dd45c55bf Mon Sep 17 00:00:00 2001
From: Erik Welch <erik.n.welch@gmail.com>
Date: Mon, 16 Oct 2023 10:28:32 -0500
Subject: [PATCH 5/7] codecov annotations in github are super annoying

---
 .codecov.yml | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/.codecov.yml b/.codecov.yml
index 5371c00ad..ba857074f 100644
--- a/.codecov.yml
+++ b/.codecov.yml
@@ -7,6 +7,9 @@ coverage:
       default:
         informational: true
     changes: false
-comment: on
+comment:
+  behavior: default
+github_checks:
+  annotations: false
 ignore:
   - graphblas/viz.py

From 76f28ee79991ae0f700949c707a81903fd62ffd1 Mon Sep 17 00:00:00 2001
From: Erik Welch <erik.n.welch@gmail.com>
Date: Mon, 16 Oct 2023 21:35:55 -0500
Subject: [PATCH 6/7] Try different layout for codecov PR comment (what is
 header and footer?)

---
 .codecov.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.codecov.yml b/.codecov.yml
index ba857074f..24821eff5 100644
--- a/.codecov.yml
+++ b/.codecov.yml
@@ -8,6 +8,7 @@ coverage:
         informational: true
     changes: false
 comment:
+  layout: "header, diff, footer"
   behavior: default
 github_checks:
   annotations: false

From f36002e59e33efc047ec4dd4770f7ad5c0250755 Mon Sep 17 00:00:00 2001
From: Erik Welch <erik.n.welch@gmail.com>
Date: Mon, 16 Oct 2023 21:42:37 -0500
Subject: [PATCH 7/7] Don't need footer in codecov comment

---
 .codecov.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.codecov.yml b/.codecov.yml
index 24821eff5..1894009c1 100644
--- a/.codecov.yml
+++ b/.codecov.yml
@@ -8,7 +8,7 @@ coverage:
         informational: true
     changes: false
 comment:
-  layout: "header, diff, footer"
+  layout: "header, diff"
   behavior: default
 github_checks:
   annotations: false