From 8145a7ce3fc3c726351dcda57ec3974d6d98658a Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Fri, 17 Apr 2020 19:08:48 -0500 Subject: [PATCH 1/9] Add `register_anonymous` to BinaryOp and use this for `isclose`. Oops, I still need to do this for `UnaryOp`. We cache the compilation of `isclose` based on `(abs_tol, rel_tol)`. Overally, I don't know the performance difference of this `isclose`, but just as importantly this should be more memory efficient. --- grblas/matrix.py | 27 +++++++-------------------- grblas/ops.py | 15 ++++++++++++--- grblas/vector.py | 38 +++++++++++++++++--------------------- 3 files changed, 36 insertions(+), 44 deletions(-) diff --git a/grblas/matrix.py b/grblas/matrix.py index 121955efb..50ad3c20a 100644 --- a/grblas/matrix.py +++ b/grblas/matrix.py @@ -1,9 +1,9 @@ from functools import partial from .base import lib, ffi, GbContainer, GbDelayed -from .vector import Vector +from .vector import Vector, _generate_isclose from .scalar import Scalar from .ops import BinaryOp, find_opclass, find_return_type, reify_op -from . import dtypes, unary, binary, monoid, semiring +from . import dtypes, binary, monoid, semiring from .exceptions import check_status, is_error, NoValue @@ -57,7 +57,7 @@ def isclose(self, other, *, rel_tol=1e-7, abs_tol=0.0, check_dtype=False): """ Check for approximate equality (including same size and empty values) If `check_dtype` is True, also checks that dtypes match - Closeness check is equivalent to `abs(a-b) <= max(rtol * max(abs(a), abs(b)), atol)` + Closeness check is equivalent to `abs(a-b) <= max(rel_tol * max(abs(a), abs(b)), abs_tol)` """ if type(other) is not self.__class__: return False @@ -69,28 +69,15 @@ def isclose(self, other, *, rel_tol=1e-7, abs_tol=0.0, check_dtype=False): return False if self.nvals != other.nvals: return False - if check_dtype: - common_dtype = self.dtype - else: - common_dtype = dtypes.unify(self.dtype, other.dtype) - matches = Matrix.new_from_type(bool, self.nrows, self.ncols) - tmp1 = self.apply(unary.abs).new(dtype=common_dtype) - tmp2 = other.apply(unary.abs).new(dtype=common_dtype) - tmp1 << tmp1.ewise_mult(tmp2, monoid.max) + isclose = _generate_isclose(rel_tol, abs_tol) + matches = self.ewise_mult(other, isclose).new(dtype=bool) # ewise_mult performs intersection, so nvals will indicate mismatched empty values - if tmp1.nvals != self.nvals: + if matches.nvals != self.nvals: return False - tmp1[:, :](mask=tmp1.S, accum=binary.times) << rel_tol - tmp1[:, :](mask=tmp1.S, accum=binary.max) << abs_tol - tmp2 << self.ewise_mult(other, binary.minus) - tmp2 << tmp2.apply(unary.abs) - matches << tmp2.ewise_mult(tmp1, binary.le[common_dtype]) # Check if all results are True - result = Scalar.new_from_type(bool) - result << matches.reduce_scalar(monoid.land) - return result.value + return matches.reduce_scalar(monoid.land).value def __len__(self): return self.nvals diff --git a/grblas/ops.py b/grblas/ops.py index 7bfb61602..02c122d1e 100644 --- a/grblas/ops.py +++ b/grblas/ops.py @@ -191,10 +191,9 @@ class BinaryOp(OpBase): all_known_instances = set() @classmethod - def register_new(cls, name, func): + def _build_udf(cls, func, name=None): if type(func) is not FunctionType: raise TypeError(f'udf must be a function, not {type(func)}') - module, funcname = cls._remove_nesting(name) success = False new_type_obj = cls(name) for type_, sample_val in dtypes._sample_values.items(): @@ -232,10 +231,20 @@ def binary_wrapper(z, x, y): except Exception: continue if success: - setattr(module, funcname, new_type_obj) + return new_type_obj else: raise UdfParseError('Unable to parse function using Numba') + @classmethod + def register_anonymous(cls, func): + return cls._build_udf(func) + + @classmethod + def register_new(cls, name, func): + module, funcname = cls._remove_nesting(name) + udf = cls._build_udf(func) + setattr(module, funcname, udf) + @classmethod def _initialize(cls): super()._initialize() diff --git a/grblas/vector.py b/grblas/vector.py index 72a54dbac..6316f8a4d 100644 --- a/grblas/vector.py +++ b/grblas/vector.py @@ -1,11 +1,21 @@ -from functools import partial +from functools import lru_cache, partial from .base import lib, ffi, GbContainer, GbDelayed from .scalar import Scalar from .ops import BinaryOp, find_opclass, find_return_type, reify_op -from . import dtypes, unary, binary, monoid, semiring +from . import dtypes, binary, monoid, semiring from .exceptions import check_status, is_error, NoValue +@lru_cache(maxsize=1024) +def _generate_isclose(rel_tol, abs_tol): + # numba will inline the current values of `rel_tol` and `abs_tol` below + def isclose(x, y): + # Return 1 or 0 instead of bool because of this numba issue + # https://github.com/numba/numba/issues/5395 + return 1 if abs(x - y) <= max(rel_tol * max(abs(x), abs(y)), abs_tol) else 0 + return BinaryOp.register_anonymous(isclose) + + class Vector(GbContainer): """ GraphBLAS Sparse Vector @@ -55,7 +65,7 @@ def isclose(self, other, *, rel_tol=1e-7, abs_tol=0.0, check_dtype=False): """ Check for approximate equality (including same size and empty values) If `check_dtype` is True, also checks that dtypes match - Closeness check is equivalent to `abs(a-b) <= max(rtol * max(abs(a), abs(b)), atol)` + Closeness check is equivalent to `abs(a-b) <= max(rel_tol * max(abs(a), abs(b)), abs_tol)` """ if type(other) is not self.__class__: return False @@ -65,29 +75,15 @@ def isclose(self, other, *, rel_tol=1e-7, abs_tol=0.0, check_dtype=False): return False if self.nvals != other.nvals: return False - if check_dtype: - # dtypes are equivalent, so not need to unify - common_dtype = self.dtype - else: - common_dtype = dtypes.unify(self.dtype, other.dtype) - matches = Vector.new_from_type(bool, self.size) - tmp1 = self.apply(unary.abs).new(dtype=common_dtype) - tmp2 = other.apply(unary.abs).new(dtype=common_dtype) - tmp1 << tmp1.ewise_mult(tmp2, monoid.max) + isclose = _generate_isclose(rel_tol, abs_tol) + matches = self.ewise_mult(other, isclose).new(dtype=bool) # ewise_mult performs intersection, so nvals will indicate mismatched empty values - if tmp1.nvals != self.nvals: + if matches.nvals != self.nvals: return False - tmp1[:](mask=tmp1.S, accum=binary.times) << rel_tol - tmp1[:](mask=tmp1.S, accum=binary.max) << abs_tol - tmp2 << self.ewise_mult(other, binary.minus) - tmp2 << tmp2.apply(unary.abs) - matches << tmp2.ewise_mult(tmp1, binary.le[common_dtype]) # Check if all results are True - result = Scalar.new_from_type(bool) - result << matches.reduce(monoid.land) - return result.value + return matches.reduce(monoid.land).value def __len__(self): return self.nvals From e392eb4377406af07f1ef03af01c68d9fc940d58 Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Fri, 17 Apr 2020 23:16:24 -0500 Subject: [PATCH 2/9] Allow creation of anonymous UnaryOp, Monoid, and Semiring as well. Still need to add a few tests and docstrings. I'm not sure I like `register_anonymous`. create_anonymous, build_anonymous, create, build, compile, jit? --- grblas/ops.py | 51 ++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 40 insertions(+), 11 deletions(-) diff --git a/grblas/ops.py b/grblas/ops.py index 02c122d1e..b404572ad 100644 --- a/grblas/ops.py +++ b/grblas/ops.py @@ -127,10 +127,9 @@ class UnaryOp(OpBase): all_known_instances = set() @classmethod - def register_new(cls, name, func): + def _build(cls, func, name=None): if type(func) is not FunctionType: raise TypeError(f'udf must be a function, not {type(func)}') - module, funcname = cls._remove_nesting(name) success = False new_type_obj = cls(name) for type_, sample_val in dtypes._sample_values.items(): @@ -167,10 +166,20 @@ def unary_wrapper(z, x): except Exception: continue if success: - setattr(module, funcname, new_type_obj) + return new_type_obj else: raise UdfParseError('Unable to parse function using Numba') + @classmethod + def register_anonymous(cls, func): + return cls._build(func) + + @classmethod + def register_new(cls, name, func): + module, funcname = cls._remove_nesting(name) + unary_op = cls._build(func) + setattr(module, funcname, unary_op) + class BinaryOp(OpBase): _module = binary @@ -191,7 +200,7 @@ class BinaryOp(OpBase): all_known_instances = set() @classmethod - def _build_udf(cls, func, name=None): + def _build(cls, func, name=None): if type(func) is not FunctionType: raise TypeError(f'udf must be a function, not {type(func)}') success = False @@ -237,13 +246,13 @@ def binary_wrapper(z, x, y): @classmethod def register_anonymous(cls, func): - return cls._build_udf(func) + return cls._build(func) @classmethod def register_new(cls, name, func): module, funcname = cls._remove_nesting(name) - udf = cls._build_udf(func) - setattr(module, funcname, udf) + binary_op = cls._build(func) + setattr(module, funcname, binary_op) @classmethod def _initialize(cls): @@ -280,7 +289,7 @@ class Monoid(OpBase): all_known_instances = set() @classmethod - def register_new(cls, name, binaryop, zero): + def _build(cls, binaryop, zero, name=None): if type(binaryop) is not BinaryOp: raise TypeError(f'binaryop must be a BinaryOp, not {type(binaryop)}') module, funcname = cls._remove_nesting(name) @@ -295,7 +304,17 @@ def register_new(cls, name, binaryop, zero): ret_type = find_return_type(binaryop[type_]) _return_type[new_monoid[0]] = ret_type cls.all_known_instances.add(new_monoid[0]) - setattr(module, funcname, new_type_obj) + return new_type_obj + + @classmethod + def register_anonymous(cls, binaryop, zero): + return cls._build(binaryop, zero) + + @classmethod + def register_new(cls, name, binaryop, zero): + module, funcname = cls._remove_nesting(name) + monoid = cls._build(binaryop, zero, name) + setattr(module, funcname, monoid) class Semiring(OpBase): @@ -315,7 +334,7 @@ class Semiring(OpBase): all_known_instances = set() @classmethod - def register_new(cls, name, monoid, binaryop): + def _build(cls, monoid, binaryop, name=None): if type(monoid) is not Monoid: raise TypeError(f'monoid must be a Monoid, not {type(monoid)}') if type(binaryop) != BinaryOp: @@ -330,7 +349,17 @@ def register_new(cls, name, monoid, binaryop): ret_type = find_return_type(monoid[type_]) _return_type[new_semiring[0]] = ret_type cls.all_known_instances.add(new_semiring[0]) - setattr(module, funcname, new_type_obj) + return new_type_obj + + @classmethod + def register_anonymous(cls, monoid, binaryop): + return cls._build(monoid, binaryop) + + @classmethod + def register_new(cls, name, monoid, binaryop): + module, funcname = cls._remove_nesting(name) + semiring = cls._build(monoid, binaryop, name) + setattr(module, funcname, semiring) def find_opclass(gb_op): From b4415c46760fa543c8f9a759e5ac25600c38199f Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Sun, 19 Apr 2020 22:35:56 -0500 Subject: [PATCH 3/9] Create UDFs of numpy functions supported by numba. So far, do unary, binary, and monoid. Semirings may come later. This required modifying how UDFs are registered. In particular, we are now more relaxed about bool inputs that have non-bool outputs. Numba has difficulty compiling functions for bools, so we upcast bool values to int8 during the computation. This may introduce subtle bugs if coercions are not appropriate. For example, bit-twidding and shifting of bools may be incorrect! --- grblas/__init__.py | 5 +- grblas/dtypes.py | 54 +++++++----- grblas/numpyops/__init__.py | 8 ++ grblas/numpyops/binary.py | 64 ++++++++++++++ grblas/numpyops/monoid.py | 98 ++++++++++++++++++++++ grblas/numpyops/unary.py | 72 ++++++++++++++++ grblas/ops.py | 161 +++++++++++++++++++++++++----------- grblas/vector.py | 6 +- recipe/meta.yaml | 14 ++-- test/test_op.py | 6 +- 10 files changed, 403 insertions(+), 85 deletions(-) create mode 100644 grblas/numpyops/__init__.py create mode 100644 grblas/numpyops/binary.py create mode 100644 grblas/numpyops/monoid.py create mode 100644 grblas/numpyops/unary.py diff --git a/grblas/__init__.py b/grblas/__init__.py index 29e337d60..7b36aa2d4 100644 --- a/grblas/__init__.py +++ b/grblas/__init__.py @@ -4,7 +4,7 @@ _init_params = None _SPECIAL_ATTRS = ["lib", "ffi", "Matrix", "Vector", "Scalar", "base", "exceptions", "matrix", "ops", "scalar", "vector" - "unary", "binary", "monoid", "semiring"] + "unary", "binary", "monoid", "semiring", "numpyops"] def __getattr__(name): @@ -31,7 +31,7 @@ def init(backend="suitesparse", blocking=True): def _init(backend, blocking, automatic=False): global _init_params, lib, ffi, Matrix, Vector, Scalar global base, exceptions, matrix, ops, scalar, vector - global unary, binary, monoid, semiring + global unary, binary, monoid, semiring, numpyops passed_params = dict(backend=backend, blocking=blocking, automatic=automatic) if _init_params is None: @@ -68,6 +68,7 @@ def _init(backend, blocking, automatic=False): matrix = importlib.import_module(f".matrix", __name__) vector = importlib.import_module(f".vector", __name__) scalar = importlib.import_module(f".scalar", __name__) + numpyops = importlib.import_module(f".numpyops", __name__) from .matrix import Matrix from .vector import Vector from .scalar import Scalar diff --git a/grblas/dtypes.py b/grblas/dtypes.py index c579b3e73..602f48bf9 100644 --- a/grblas/dtypes.py +++ b/grblas/dtypes.py @@ -1,16 +1,18 @@ import re -from . import lib +import numpy as np import numba +from . import lib class DataType: - __slots__ = ['name', 'gb_type', 'c_type', 'numba_type'] + __slots__ = ['name', 'gb_type', 'c_type', 'numba_type', 'numpy_type'] def __init__(self, name, gb_type, c_type, numba_type): self.name = name self.gb_type = gb_type self.c_type = c_type self.numba_type = numba_type + self.numpy_type = getattr(np, numba_type.name) def __repr__(self): return self.name @@ -54,28 +56,36 @@ def from_pytype(cls, pytype): # Used for testing user-defined functions _sample_values = { - BOOL: True, - INT8: -3, - UINT8: 3, - INT16: -3, - UINT16: 3, - INT32: -3, - UINT32: 3, - INT64: -3, - UINT64: 3, - FP32: 3.14, - FP64: 3.14 + INT8: np.int8(1), + UINT8: np.uint8(1), + INT16: np.int16(1), + UINT16: np.uint16(1), + INT32: np.int32(1), + UINT32: np.uint32(1), + INT64: np.int64(1), + UINT64: np.uint64(1), + FP32: np.float32(0.5), + FP64: np.float64(0.5), + BOOL: np.bool_(True), } # Create register to easily lookup types by name, gb_type, or c_type _registry = {} -for x in _sample_values: - _registry[x.name] = x - _registry[x.gb_type] = x - _registry[x.c_type] = x - _registry[x.numba_type] = x - _registry[x.numba_type.name] = x -del x +for dtype, val in _sample_values.items(): + _registry[dtype.name] = dtype + _registry[dtype.gb_type] = dtype + _registry[dtype.c_type] = dtype + _registry[dtype.numba_type] = dtype + _registry[dtype.numba_type.name] = dtype + _registry[dtype.numpy_type] = dtype + _registry[val.dtype] = dtype + _registry[val.dtype.name] = dtype +del dtype +# Upcast numpy float16 to float32 +_registry[np.float16] = FP32 +_registry[np.dtype(np.float16)] = FP32 +_registry['float16'] = FP32 + # Add some common Python types as lookup keys _registry[int] = DataType.from_pytype(int) _registry[float] = DataType.from_pytype(float) @@ -92,6 +102,10 @@ def lookup(key): if hasattr(key, 'name'): return _registry[key.name] else: + try: + return lookup(np.dtype(key)) + except Exception: + pass raise diff --git a/grblas/numpyops/__init__.py b/grblas/numpyops/__init__.py new file mode 100644 index 000000000..f42c58479 --- /dev/null +++ b/grblas/numpyops/__init__.py @@ -0,0 +1,8 @@ +""" Create UDFs of numpy functions supported by numba. + +See list of numpy ufuncs supported by numpy here: + +https://numba.pydata.org/numba-doc/dev/reference/numpysupported.html#math-operations + +""" +from . import unary, binary, monoid # noqa diff --git a/grblas/numpyops/binary.py b/grblas/numpyops/binary.py new file mode 100644 index 000000000..5d8b665ad --- /dev/null +++ b/grblas/numpyops/binary.py @@ -0,0 +1,64 @@ +import numpy as np +from .. import ops + +_binary_names = { + # Math operations + 'add', + 'subtract', + 'multiply', + 'divide', + 'logaddexp', + 'logaddexp2', + 'true_divide', + 'floor_divide', + 'power', + 'remainder', + 'mod', + 'fmod', + 'gcd', + 'lcm', + + # Trigonometric functions + 'arctan2', + 'hypot', + + # Bit-twiddling functions + 'bitwise_and', + 'bitwise_or', + 'bitwise_xor', + 'left_shift', + 'right_shift', + + # Comparison functions + 'greater', + 'greater_equal', + 'less', + 'less_equal', + 'not_equal', + 'equal', + 'logical_and', + 'logical_or', + 'logical_xor', + 'maximum', + 'minimum', + 'fmax', + 'fmin', + + # Floating functions + 'copysign', + 'nextafter', + 'ldexp', +} + + +def __dir__(): + return list(_binary_names) + + +def __getattr__(name): + if name not in _binary_names: + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") + numpy_func = getattr(np, name) + func = ops.BinaryOp.register_anonymous(lambda x, y: numpy_func(x, y), name) + globals()[name] = func + return func diff --git a/grblas/numpyops/monoid.py b/grblas/numpyops/monoid.py new file mode 100644 index 000000000..a7d04edd4 --- /dev/null +++ b/grblas/numpyops/monoid.py @@ -0,0 +1,98 @@ +import numpy as np +from .. import dtypes, ops +from . import binary + +_float_dtypes = {dtypes.FP32, dtypes.FP64} +_int_dtypes = { + dtypes.INT8, dtypes.UINT8, dtypes.INT16, dtypes.UINT16, + dtypes.INT32, dtypes.UINT32, dtypes.INT64, dtypes.UINT64, +} +_bool_int_dtypes = _int_dtypes | {dtypes.BOOL} + +_monoid_identities = { + # Math operations + 'add': 0, + 'multiply': 1, + 'logaddexp': dict.fromkeys(_float_dtypes, -np.inf), + 'logaddexp2': dict.fromkeys(_float_dtypes, -np.inf), + 'gcd': dict.fromkeys(_int_dtypes, 0), + + # Trigonometric functions + 'hypot': dict.fromkeys(_float_dtypes, 0.), + + # Bit-twiddling functions + 'bitwise_and': {dtype: True if dtype is dtypes.BOOL else -1 for dtype in _bool_int_dtypes}, + 'bitwise_or': dict.fromkeys(_bool_int_dtypes, 0), + 'bitwise_xor': dict.fromkeys(_bool_int_dtypes, 0), + + # Comparison functions + 'equal': {dtypes.BOOL: True}, + 'logical_and': {dtypes.BOOL: True}, + 'logical_or': {dtypes.BOOL: True}, + 'logical_xor': {dtypes.BOOL: False}, + 'maximum': { + dtypes.BOOL: False, + dtypes.INT8: np.iinfo(np.int8).min, + dtypes.UINT8: 0, + dtypes.INT16: np.iinfo(np.int16).min, + dtypes.UINT16: 0, + dtypes.INT32: np.iinfo(np.int32).min, + dtypes.UINT32: 0, + dtypes.INT64: np.iinfo(np.int64).min, + dtypes.UINT64: 0, + dtypes.FP32: -np.inf, + dtypes.FP64: -np.inf, + }, + 'minimum': { + dtypes.BOOL: True, + dtypes.INT8: np.iinfo(np.int8).max, + dtypes.UINT8: np.iinfo(np.uint8).max, + dtypes.INT16: np.iinfo(np.int16).max, + dtypes.UINT16: np.iinfo(np.uint16).max, + dtypes.INT32: np.iinfo(np.int32).max, + dtypes.UINT32: np.iinfo(np.uint32).max, + dtypes.INT64: np.iinfo(np.int64).max, + dtypes.UINT64: np.iinfo(np.uint64).max, + dtypes.FP32: np.inf, + dtypes.FP64: np.inf, + }, + 'fmax': { + dtypes.BOOL: False, + dtypes.INT8: np.iinfo(np.int8).min, + dtypes.UINT8: 0, + dtypes.INT16: np.iinfo(np.int8).min, + dtypes.UINT16: 0, + dtypes.INT32: np.iinfo(np.int8).min, + dtypes.UINT32: 0, + dtypes.INT64: np.iinfo(np.int8).min, + dtypes.UINT64: 0, + dtypes.FP32: -np.inf, # or np.nan? + dtypes.FP64: -np.inf, # or np.nan? + }, + 'fmin': { + dtypes.BOOL: True, + dtypes.INT8: np.iinfo(np.int8).max, + dtypes.UINT8: np.iinfo(np.uint8).max, + dtypes.INT16: np.iinfo(np.int16).max, + dtypes.UINT16: np.iinfo(np.uint16).max, + dtypes.INT32: np.iinfo(np.int32).max, + dtypes.UINT32: np.iinfo(np.uint32).max, + dtypes.INT64: np.iinfo(np.int64).max, + dtypes.UINT64: np.iinfo(np.uint64).max, + dtypes.FP32: np.inf, # or np.nan? + dtypes.FP64: np.inf, # or np.nan? + }, +} + + +def __dir__(): + return list(_monoid_identities) + + +def __getattr__(name): + if name not in _monoid_identities: + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") + func = getattr(binary, name) + monoid = ops.Monoid.register_anonymous(func, _monoid_identities[name], name) + globals()[name] = monoid + return monoid diff --git a/grblas/numpyops/unary.py b/grblas/numpyops/unary.py new file mode 100644 index 000000000..7bd6ebde1 --- /dev/null +++ b/grblas/numpyops/unary.py @@ -0,0 +1,72 @@ +import numpy as np +from .. import ops + +_unary_names = { + # Math operations + 'negative', + 'abs', + 'absolute', + 'fabs', + 'rint', + 'sign', + 'conj', + 'exp', + 'exp2', + 'log', + 'log2', + 'log10', + 'expm1', + 'log1p', + 'sqrt', + 'square', + 'reciprocal', + 'conjugate', + + # Trigonometric functions + 'sin', + 'cos', + 'tan', + 'arcsin', + 'arccos', + 'arctan', + 'sinh', + 'cosh', + 'tanh', + 'arcsinh', + 'arccosh', + 'arctanh', + 'deg2rad', + 'rad2deg', + 'degrees', + 'radians', + + # Bit-twiddling functions + 'bitwise_not', + 'invert', + + # Comparison functions + 'logical_not', + + # Floating functions + 'isfinite', + 'isinf', + 'isnan', + 'signbit', + 'floor', + 'ceil', + 'trunc', + 'spacing', +} + + +def __dir__(): + return list(_unary_names) + + +def __getattr__(name): + if name not in _unary_names: + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") + numpy_func = getattr(np, name) + func = ops.UnaryOp.register_anonymous(lambda x: numpy_func(x)) + globals()[name] = func + return func diff --git a/grblas/ops.py b/grblas/ops.py index b404572ad..91b3a877f 100644 --- a/grblas/ops.py +++ b/grblas/ops.py @@ -1,5 +1,7 @@ import re +import numpy as np import numba +from collections.abc import Mapping from types import FunctionType from . import lib, ffi, dtypes, unary, binary, monoid, semiring from .exceptions import GrblasException @@ -127,29 +129,41 @@ class UnaryOp(OpBase): all_known_instances = set() @classmethod - def _build(cls, func, name=None): + def _build(cls, name, func): if type(func) is not FunctionType: raise TypeError(f'udf must be a function, not {type(func)}') success = False new_type_obj = cls(name) + return_types = {} + nt = numba.types for type_, sample_val in dtypes._sample_values.items(): # Check if func can handle this data type try: - ret = func(sample_val) - if type(ret) is bool: - ret_type = dtypes.BOOL - elif type_ == 'BOOL': - # type_ == bool, but return type != bool; invalid - continue - else: + with np.errstate(divide='ignore', over='ignore', under='ignore', invalid='ignore'): + ret = func(sample_val) + ret_type = dtypes.lookup(type(ret)) + if ret_type != type_ and ( + 'INT' in ret_type.name and 'INT' in type_.name + or 'FP' in ret_type.name and 'FP' in type_.name + or type_ == 'UINT64' and ret_type == 'FP64' and return_types.get('INT64') == 'INT64' + ): + # Downcast `ret_type` to `type_`. This is probably what users want most of the time, + # but we can't make a perfect rule. There should be a way for users to be explicit. ret_type = type_ + elif type_ == 'BOOL' and ret_type == 'INT64' and return_types.get('INT8') == 'INT8': + ret_type = dtypes.INT8 + + # Numba has a bug and is unable to handle BOOL correctly right now + # See: https://github.com/numba/numba/issues/5395 + # We're relying on coercion behaving correctly here + input_type = dtypes.INT8 if type_ == 'BOOL' else type_ + return_type = dtypes.INT8 if ret_type == 'BOOL' else ret_type - nt = numba.types # JIT the func so it can be used from a cfunc unary_udf = numba.njit(func) # Build wrapper because GraphBLAS wants pointers and void return - wrapper_sig = nt.void(nt.CPointer(ret_type.numba_type), - nt.CPointer(type_.numba_type)) + wrapper_sig = nt.void(nt.CPointer(return_type.numba_type), + nt.CPointer(input_type.numba_type)) @numba.cfunc(wrapper_sig, nopython=True) def unary_wrapper(z, x): @@ -158,11 +172,12 @@ def unary_wrapper(z, x): new_unary = ffi.new('GrB_UnaryOp*') lib.GrB_UnaryOp_new(new_unary, unary_wrapper.cffi, - ret_type.gb_type, type_.gb_type) + return_type.gb_type, input_type.gb_type) new_type_obj[type_.name] = new_unary[0] _return_type[new_unary[0]] = ret_type.name cls.all_known_instances.add(new_unary[0]) success = True + return_types[type_.name] = ret_type.name except Exception: continue if success: @@ -171,13 +186,15 @@ def unary_wrapper(z, x): raise UdfParseError('Unable to parse function using Numba') @classmethod - def register_anonymous(cls, func): - return cls._build(func) + def register_anonymous(cls, func, name=None): + if name is None: + name = getattr(func, '__name__', '') + return cls._build(name, func) @classmethod def register_new(cls, name, func): module, funcname = cls._remove_nesting(name) - unary_op = cls._build(func) + unary_op = cls._build(name, func) setattr(module, funcname, unary_op) @@ -188,9 +205,15 @@ class BinaryOp(OpBase): 'trim_from_front': 4, 'num_underscores': 1, 're_exprs': [ - re.compile('^GrB_(FIRST|SECOND|MIN|MAX|PLUS|MINUS|TIMES|DIV)_(BOOL|INT8|UINT8|INT16|UINT16|INT32|UINT32|INT64|UINT64|FP32|FP64)$'), + re.compile( + '^GrB_(FIRST|SECOND|MIN|MAX|PLUS|MINUS|TIMES|DIV)' + '_(BOOL|INT8|UINT8|INT16|UINT16|INT32|UINT32|INT64|UINT64|FP32|FP64)$' + ), re.compile('^GrB_(LOR|LAND|LXOR)$'), - re.compile('^GxB_(RMINUS|RDIV|PAIR|ANY|ISEQ|ISNE|ISGT|ISLT|ISLE|ISGE)_(BOOL|INT8|UINT8|INT16|UINT16|INT32|UINT32|INT64|UINT64|FP32|FP64)$'), + re.compile( + '^GxB_(RMINUS|RDIV|PAIR|ANY|ISEQ|ISNE|ISGT|ISLT|ISLE|ISGE)' + '_(BOOL|INT8|UINT8|INT16|UINT16|INT32|UINT32|INT64|UINT64|FP32|FP64)$' + ), ], 're_exprs_return_bool': [ re.compile('^GrB_(EQ|NE|GT|LT|GE|LE)_(BOOL|INT8|UINT8|INT16|UINT16|INT32|UINT32|INT64|UINT64|FP32|FP64)$'), @@ -200,30 +223,42 @@ class BinaryOp(OpBase): all_known_instances = set() @classmethod - def _build(cls, func, name=None): + def _build(cls, name, func): if type(func) is not FunctionType: raise TypeError(f'udf must be a function, not {type(func)}') success = False new_type_obj = cls(name) + return_types = {} + nt = numba.types for type_, sample_val in dtypes._sample_values.items(): # Check if func can handle this data type try: - ret = func(sample_val, sample_val) - if type(ret) is bool: - ret_type = dtypes.BOOL - elif type_ == 'BOOL': - # type_ == bool, but return type != bool; invalid - continue - else: + with np.errstate(divide='ignore', over='ignore', under='ignore', invalid='ignore'): + ret = func(sample_val, sample_val) + ret_type = dtypes.lookup(type(ret)) + if ret_type != type_ and ( + 'INT' in ret_type.name and 'INT' in type_.name + or 'FP' in ret_type.name and 'FP' in type_.name + or type_ == 'UINT64' and ret_type == 'FP64' and return_types.get('INT64') == 'INT64' + ): + # Downcast `ret_type` to `type_`. This is probably what users want most of the time, + # but we can't make a perfect rule. There should be a way for users to be explicit. ret_type = type_ + elif type_ == 'BOOL' and ret_type == 'INT64' and return_types.get('INT8') == 'INT8': + ret_type = dtypes.INT8 + + # Numba has a bug and is unable to handle BOOL correctly right now + # See: https://github.com/numba/numba/issues/5395 + # We're relying on coercion behaving correctly here + input_type = dtypes.INT8 if type_ == 'BOOL' else type_ + return_type = dtypes.INT8 if ret_type == 'BOOL' else ret_type - nt = numba.types # JIT the func so it can be used from a cfunc binary_udf = numba.njit(func) # Build wrapper because GraphBLAS wants pointers and void return - wrapper_sig = nt.void(nt.CPointer(ret_type.numba_type), - nt.CPointer(type_.numba_type), - nt.CPointer(type_.numba_type)) + wrapper_sig = nt.void(nt.CPointer(return_type.numba_type), + nt.CPointer(input_type.numba_type), + nt.CPointer(input_type.numba_type)) @numba.cfunc(wrapper_sig, nopython=True) def binary_wrapper(z, x, y): @@ -232,11 +267,12 @@ def binary_wrapper(z, x, y): new_binary = ffi.new('GrB_BinaryOp*') lib.GrB_BinaryOp_new(new_binary, binary_wrapper.cffi, - ret_type.gb_type, type_.gb_type, type_.gb_type) + return_type.gb_type, input_type.gb_type, input_type.gb_type) new_type_obj[type_.name] = new_binary[0] _return_type[new_binary[0]] = ret_type.name cls.all_known_instances.add(new_binary[0]) success = True + return_types[type_.name] = ret_type.name except Exception: continue if success: @@ -245,13 +281,15 @@ def binary_wrapper(z, x, y): raise UdfParseError('Unable to parse function using Numba') @classmethod - def register_anonymous(cls, func): - return cls._build(func) + def register_anonymous(cls, func, name=None): + if name is None: + name = getattr(func, '__name__', '') + return cls._build(name, func) @classmethod def register_new(cls, name, func): module, funcname = cls._remove_nesting(name) - binary_op = cls._build(func) + binary_op = cls._build(name, func) setattr(module, funcname, binary_op) @classmethod @@ -282,23 +320,29 @@ class Monoid(OpBase): 'trim_from_back': 7, 'num_underscores': 1, 're_exprs': [ - re.compile('^GxB_(MAX|MIN|PLUS|TIMES|ANY)_(INT8|UINT8|INT16|UINT16|INT32|UINT32|INT64|UINT64|FP32|FP64)_MONOID$'), + re.compile( + '^GxB_(MAX|MIN|PLUS|TIMES|ANY)' + '_(INT8|UINT8|INT16|UINT16|INT32|UINT32|INT64|UINT64|FP32|FP64)_MONOID$' + ), re.compile('^GxB_(EQ|LAND|LOR|LXOR|ANY)_BOOL_MONOID$'), ], } all_known_instances = set() @classmethod - def _build(cls, binaryop, zero, name=None): + def _build(cls, name, binaryop, identity): if type(binaryop) is not BinaryOp: raise TypeError(f'binaryop must be a BinaryOp, not {type(binaryop)}') - module, funcname = cls._remove_nesting(name) new_type_obj = cls(name) - for type_ in binaryop.types: + if not isinstance(identity, Mapping): + identities = dict.fromkeys(binaryop.types, identity) + else: + identities = identity + for type_, identity in identities.items(): type_ = dtypes.lookup(type_) new_monoid = ffi.new('GrB_Monoid*') func = getattr(lib, f'GrB_Monoid_new_{type_.name}') - zcast = ffi.cast(type_.c_type, zero) + zcast = ffi.cast(type_.c_type, identity) func(new_monoid, binaryop[type_], zcast) new_type_obj[type_.name] = new_monoid[0] ret_type = find_return_type(binaryop[type_]) @@ -307,13 +351,17 @@ def _build(cls, binaryop, zero, name=None): return new_type_obj @classmethod - def register_anonymous(cls, binaryop, zero): - return cls._build(binaryop, zero) + def register_anonymous(cls, binaryop, identity, name=None): + if name is None: + name = getattr(binaryop, 'name', name) + if name is None: + name = getattr(binaryop, '__name__', '') + return cls._build(name, binaryop, identity) @classmethod - def register_new(cls, name, binaryop, zero): + def register_new(cls, name, binaryop, identity): module, funcname = cls._remove_nesting(name) - monoid = cls._build(binaryop, zero, name) + monoid = cls._build(name, binaryop, identity) setattr(module, funcname, monoid) @@ -324,22 +372,29 @@ class Semiring(OpBase): 'trim_from_front': 4, 'num_underscores': 2, 're_exprs': [ - re.compile('^GxB_(MIN|MAX|PLUS|TIMES|ANY)_(FIRST|SECOND|PAIR|MIN|MAX|PLUS|MINUS|RMINUS|TIMES|DIV|RDIV|ISEQ|ISNE|ISGT|ISLT|ISGE|ISLE|LOR|LAND|LXOR)_(INT8|UINT8|INT16|UINT16|INT32|UINT32|INT64|UINT64|FP32|FP64)$'), + re.compile( + '^GxB_(MIN|MAX|PLUS|TIMES|ANY)' + '_(FIRST|SECOND|PAIR|MIN|MAX|PLUS|MINUS|RMINUS|TIMES' + '|DIV|RDIV|ISEQ|ISNE|ISGT|ISLT|ISGE|ISLE|LOR|LAND|LXOR)' + '_(INT8|UINT8|INT16|UINT16|INT32|UINT32|INT64|UINT64|FP32|FP64)$' + ), re.compile('^GxB_(LOR|LAND|LXOR|EQ|ANY)_(FIRST|SECOND|PAIR|LOR|LAND|LXOR|EQ|GT|LT|GE|LE)_BOOL$'), ], 're_exprs_return_bool': [ - re.compile('^GxB_(LOR|LAND|LXOR|EQ|ANY)_(EQ|NE|GT|LT|GE|LE)_(INT8|UINT8|INT16|UINT16|INT32|UINT32|INT64|UINT64|FP32|FP64)$'), + re.compile( + '^GxB_(LOR|LAND|LXOR|EQ|ANY)_(EQ|NE|GT|LT|GE|LE)' + '_(INT8|UINT8|INT16|UINT16|INT32|UINT32|INT64|UINT64|FP32|FP64)$' + ), ], } all_known_instances = set() @classmethod - def _build(cls, monoid, binaryop, name=None): + def _build(cls, name, monoid, binaryop): if type(monoid) is not Monoid: raise TypeError(f'monoid must be a Monoid, not {type(monoid)}') if type(binaryop) != BinaryOp: raise TypeError(f'binaryop must be a BinaryOp, not {type(binaryop)}') - module, funcname = cls._remove_nesting(name) new_type_obj = cls(name) for type_ in binaryop.types & monoid.types: type_ = dtypes.lookup(type_) @@ -352,13 +407,21 @@ def _build(cls, monoid, binaryop, name=None): return new_type_obj @classmethod - def register_anonymous(cls, monoid, binaryop): - return cls._build(monoid, binaryop) + def register_anonymous(cls, monoid, binaryop, name=None): + if name is None: + name1 = getattr(monoid, 'name', name) + if name1 is None: + name1 = getattr(monoid, '__name__', '') + name2 = getattr(binaryop, 'name', name) + if name2 is None: + name2 = getattr(binaryop, '__name__', '') + name = f'{name1}_{name2}' + return cls._build(name, monoid, binaryop) @classmethod def register_new(cls, name, monoid, binaryop): module, funcname = cls._remove_nesting(name) - semiring = cls._build(monoid, binaryop, name) + semiring = cls._build(name, monoid, binaryop) setattr(module, funcname, semiring) diff --git a/grblas/vector.py b/grblas/vector.py index 6316f8a4d..ede0de3e8 100644 --- a/grblas/vector.py +++ b/grblas/vector.py @@ -10,10 +10,8 @@ def _generate_isclose(rel_tol, abs_tol): # numba will inline the current values of `rel_tol` and `abs_tol` below def isclose(x, y): - # Return 1 or 0 instead of bool because of this numba issue - # https://github.com/numba/numba/issues/5395 - return 1 if abs(x - y) <= max(rel_tol * max(abs(x), abs(y)), abs_tol) else 0 - return BinaryOp.register_anonymous(isclose) + return abs(x - y) <= max(rel_tol * max(abs(x), abs(y)), abs_tol) + return BinaryOp.register_anonymous(isclose, f'isclose(rel_tol={rel_tol}, abs_tol={abs_tol})') class Vector(GbContainer): diff --git a/recipe/meta.yaml b/recipe/meta.yaml index 525da3957..1206f9230 100644 --- a/recipe/meta.yaml +++ b/recipe/meta.yaml @@ -16,14 +16,14 @@ build: requirements: build: - {{ compiler('c') }} - + host: - python - - pip - - setuptools - - cffi - - graphblas - - pytest-runner + - pip + - setuptools + - cffi + - graphblas + - pytest-runner run: - python @@ -43,4 +43,4 @@ about: extra: recipe-maintainers: - - jim22k \ No newline at end of file + - jim22k diff --git a/test/test_op.py b/test/test_op.py index 920bc5667..a7b5eeeba 100644 --- a/test/test_op.py +++ b/test/test_op.py @@ -58,7 +58,7 @@ def plus_one(x): assert hasattr(unary, 'plus_one') assert unary.plus_one.types == {'INT8', 'INT16', 'INT32', 'INT64', 'UINT8', 'UINT16', 'UINT32', 'UINT64', - 'FP32', 'FP64'} + 'FP32', 'FP64', 'BOOL'} v = Vector.new_from_values([0, 1, 3], [1, 2, -4], dtype=dtypes.INT32) v << v.apply(unary.plus_one) result = Vector.new_from_values([0, 1, 3], [2, 3, -3], dtype=dtypes.INT32) @@ -103,7 +103,7 @@ def plus_plus_one(x, y): assert hasattr(monoid, 'plus_plus_one') assert monoid.plus_plus_one.types == {'INT8', 'INT16', 'INT32', 'INT64', 'UINT8', 'UINT16', 'UINT32', 'UINT64', - 'FP32', 'FP64'} + 'FP32', 'FP64', 'BOOL'} v1 = Vector.new_from_values([0, 1, 3], [1, 2, -4], dtype=dtypes.INT32) v2 = Vector.new_from_values([0, 2, 3], [2, 3, 7], dtype=dtypes.INT32) w = v1.ewise_add(v2, monoid.plus_plus_one).new() @@ -149,7 +149,7 @@ def plus_three(x): assert hasattr(unary.incrementers, 'plus_three') assert unary.incrementers.plus_three.types == {'INT8', 'INT16', 'INT32', 'INT64', 'UINT8', 'UINT16', 'UINT32', 'UINT64', - 'FP32', 'FP64'} + 'FP32', 'FP64', 'BOOL'} v = Vector.new_from_values([0, 1, 3], [1, 2, -4], dtype=dtypes.INT32) v << v.apply(unary.incrementers.plus_three) result = Vector.new_from_values([0, 1, 3], [4, 5, -1], dtype=dtypes.INT32) From 654eddffe3f5a9e47dbcb43972616d5fe3bdd539 Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Mon, 20 Apr 2020 17:21:06 -0500 Subject: [PATCH 4/9] Add `grblas.numpyops.semiring` and updated logic for registering semirings. Specifically, we need to match the return type of binary functions to the type for the monoid. --- grblas/dtypes.py | 5 +-- grblas/numpyops/__init__.py | 2 +- grblas/numpyops/semiring.py | 77 +++++++++++++++++++++++++++++++++++++ grblas/ops.py | 37 ++++++++---------- setup.cfg | 6 ++- test/conftest.py | 9 +++++ test/test_numpyops.py | 20 ++++++++++ 7 files changed, 129 insertions(+), 27 deletions(-) create mode 100644 grblas/numpyops/semiring.py create mode 100644 test/test_numpyops.py diff --git a/grblas/dtypes.py b/grblas/dtypes.py index 602f48bf9..3f2cc4008 100644 --- a/grblas/dtypes.py +++ b/grblas/dtypes.py @@ -5,14 +5,13 @@ class DataType: - __slots__ = ['name', 'gb_type', 'c_type', 'numba_type', 'numpy_type'] + __slots__ = ['name', 'gb_type', 'c_type', 'numba_type'] def __init__(self, name, gb_type, c_type, numba_type): self.name = name self.gb_type = gb_type self.c_type = c_type self.numba_type = numba_type - self.numpy_type = getattr(np, numba_type.name) def __repr__(self): return self.name @@ -77,12 +76,10 @@ def from_pytype(cls, pytype): _registry[dtype.c_type] = dtype _registry[dtype.numba_type] = dtype _registry[dtype.numba_type.name] = dtype - _registry[dtype.numpy_type] = dtype _registry[val.dtype] = dtype _registry[val.dtype.name] = dtype del dtype # Upcast numpy float16 to float32 -_registry[np.float16] = FP32 _registry[np.dtype(np.float16)] = FP32 _registry['float16'] = FP32 diff --git a/grblas/numpyops/__init__.py b/grblas/numpyops/__init__.py index f42c58479..d58f6c53b 100644 --- a/grblas/numpyops/__init__.py +++ b/grblas/numpyops/__init__.py @@ -5,4 +5,4 @@ https://numba.pydata.org/numba-doc/dev/reference/numpysupported.html#math-operations """ -from . import unary, binary, monoid # noqa +from . import unary, binary, monoid, semiring # noqa diff --git a/grblas/numpyops/semiring.py b/grblas/numpyops/semiring.py new file mode 100644 index 000000000..0b83f24a8 --- /dev/null +++ b/grblas/numpyops/semiring.py @@ -0,0 +1,77 @@ +import itertools +from .. import ops +from . import binary, monoid +from .binary import _binary_names +from .monoid import _monoid_identities + +_semiring_names = { + f'{monoid_name}_{binary_name}' + for monoid_name, binary_name in itertools.product(_monoid_identities, _binary_names) +} + +# Remove incompatible combinations +# _ +_semiring_names -= { + f'{monoid_name}_{binary_name}' + for monoid_name, binary_name in itertools.product( + {'equal', 'hypot', 'logaddexp', 'logaddexp2', 'logical_and', 'logical_or', 'logical_xor'}, + {'gcd', 'lcm', 'left_shift', 'right_shift'} + ) +} +# _ +_semiring_names -= { + f'{monoid_name}_{binary_name}' + for monoid_name, binary_name in itertools.product( + {'bitwise_and', 'bitwise_or', 'bitwise_xor', 'equal', 'gcd', 'logical_and', 'logical_or', 'logical_xor'}, + {'arctan2', 'copysign', 'divide', 'hypot', 'ldexp', 'logaddexp2', 'logaddexp', 'nextafter', 'true_divide'} + ) +} +# _ +_semiring_names -= { + f'{monoid_name}_{binary_name}' + for monoid_name, binary_name in itertools.product( + {'hypot', 'logaddexp', 'logaddexp2'}, + {'bitwise_and', 'bitwise_or', 'bitwise_xor'} + ) +} +# _ +_semiring_names -= { + f'{monoid_name}_{binary_name}' + for monoid_name, binary_name in itertools.product( + {'equal', 'logical_and', 'logical_or', 'logical_xor'}, + {'floor_divide', 'fmod', 'mod', 'power', 'remainder', 'subtract'} + ) +} +# _ +_semiring_names -= { + f'{monoid_name}_{binary_name}' + for monoid_name, binary_name in itertools.product( + {'gcd', 'hypot', 'logaddexp', 'logaddexp2'}, + {'equal', 'greater', 'greater_equal', 'less', 'less_equal', + 'logical_and', 'logical_or', 'logical_xor', 'not_equal'} + ) +} + + +def __dir__(): + return list(_semiring_names) + + +def __getattr__(name): + if name not in _semiring_names: + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") + words = name.split('_') + for i in range(1, len(words)): + monoid_name = '_'.join(words[:i]) + if not hasattr(monoid, monoid_name): + continue + binary_name = '_'.join(words[i:]) + if hasattr(binary, binary_name): + break + semiring = ops.Semiring.register_anonymous( + getattr(monoid, monoid_name), + getattr(binary, binary_name), + name + ) + globals()[name] = semiring + return semiring diff --git a/grblas/ops.py b/grblas/ops.py index 91b3a877f..8ecd70e10 100644 --- a/grblas/ops.py +++ b/grblas/ops.py @@ -132,6 +132,8 @@ class UnaryOp(OpBase): def _build(cls, name, func): if type(func) is not FunctionType: raise TypeError(f'udf must be a function, not {type(func)}') + if name is None: + name = getattr(func, '__name__', '') success = False new_type_obj = cls(name) return_types = {} @@ -187,8 +189,6 @@ def unary_wrapper(z, x): @classmethod def register_anonymous(cls, func, name=None): - if name is None: - name = getattr(func, '__name__', '') return cls._build(name, func) @classmethod @@ -226,6 +226,8 @@ class BinaryOp(OpBase): def _build(cls, name, func): if type(func) is not FunctionType: raise TypeError(f'udf must be a function, not {type(func)}') + if name is None: + name = getattr(func, '__name__', '') success = False new_type_obj = cls(name) return_types = {} @@ -282,8 +284,6 @@ def binary_wrapper(z, x, y): @classmethod def register_anonymous(cls, func, name=None): - if name is None: - name = getattr(func, '__name__', '') return cls._build(name, func) @classmethod @@ -333,6 +333,8 @@ class Monoid(OpBase): def _build(cls, name, binaryop, identity): if type(binaryop) is not BinaryOp: raise TypeError(f'binaryop must be a BinaryOp, not {type(binaryop)}') + if name is None: + name = binaryop.name new_type_obj = cls(name) if not isinstance(identity, Mapping): identities = dict.fromkeys(binaryop.types, identity) @@ -352,10 +354,6 @@ def _build(cls, name, binaryop, identity): @classmethod def register_anonymous(cls, binaryop, identity, name=None): - if name is None: - name = getattr(binaryop, 'name', name) - if name is None: - name = getattr(binaryop, '__name__', '') return cls._build(name, binaryop, identity) @classmethod @@ -395,27 +393,24 @@ def _build(cls, name, monoid, binaryop): raise TypeError(f'monoid must be a Monoid, not {type(monoid)}') if type(binaryop) != BinaryOp: raise TypeError(f'binaryop must be a BinaryOp, not {type(binaryop)}') + if name is None: + name = f'{monoid.name}_{binaryop.name}' new_type_obj = cls(name) - for type_ in binaryop.types & monoid.types: - type_ = dtypes.lookup(type_) + for binary_in, binary_func in binaryop._specific_types.items(): + binary_out = find_return_type(binary_func) + if binary_out not in monoid.types: + continue + binary_out = dtypes.lookup(binary_out) new_semiring = ffi.new('GrB_Semiring*') - lib.GrB_Semiring_new(new_semiring, monoid[type_], binaryop[type_]) - new_type_obj[type_.name] = new_semiring[0] - ret_type = find_return_type(monoid[type_]) + lib.GrB_Semiring_new(new_semiring, monoid[binary_out], binary_func) + new_type_obj[binary_in] = new_semiring[0] + ret_type = find_return_type(monoid[binary_out]) _return_type[new_semiring[0]] = ret_type cls.all_known_instances.add(new_semiring[0]) return new_type_obj @classmethod def register_anonymous(cls, monoid, binaryop, name=None): - if name is None: - name1 = getattr(monoid, 'name', name) - if name1 is None: - name1 = getattr(monoid, '__name__', '') - name2 = getattr(binaryop, 'name', name) - if name2 is None: - name2 = getattr(binaryop, '__name__', '') - name = f'{name1}_{name2}' return cls._build(name, monoid, binaryop) @classmethod diff --git a/setup.cfg b/setup.cfg index ea329e4aa..604ff03e6 100644 --- a/setup.cfg +++ b/setup.cfg @@ -2,4 +2,8 @@ test=pytest [flake8] -max-line-length = 120 \ No newline at end of file +max-line-length = 120 + +[tool:pytest] +markers: + slow: Skipped unless --runslow passed diff --git a/test/conftest.py b/test/conftest.py index 1f8dde0d5..89e937164 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -1,7 +1,11 @@ +import pytest + + def pytest_addoption(parser): parser.addoption( "--backend", action="store", default="suitesparse", help="name of a backend in grblas.backends" ) + parser.addoption("--runslow", action="store_true", help="run slow tests") def pytest_configure(config): @@ -9,3 +13,8 @@ def pytest_configure(config): import grblas grblas.init(backend) print(f'Running tests with "{backend}" backend') + + +def pytest_runtest_setup(item): + if "slow" in item.keywords and not item.config.getoption("--runslow"): + pytest.skip("need --runslow option to run") diff --git a/test/test_numpyops.py b/test/test_numpyops.py new file mode 100644 index 000000000..5e69a8164 --- /dev/null +++ b/test/test_numpyops.py @@ -0,0 +1,20 @@ +import pytest +import itertools +import grblas +from grblas.numpyops import binary as npbinary, monoid as npmonoid, semiring as npsemiring + + +@pytest.mark.slow +def test_npsemiring(): + # This is a very slow test, since it forces creation of all numpy binary, monoid, and semiring objects + for monoid_name, binary_name in itertools.product( + sorted(npmonoid._monoid_identities), + sorted(npbinary._binary_names) + ): + monoid = getattr(npmonoid, monoid_name) + binary = getattr(npbinary, binary_name) + semiring = grblas.ops.Semiring.register_anonymous(monoid, binary) + if len(semiring.types) == 0: + assert not hasattr(npsemiring, semiring.name) + else: + assert hasattr(npsemiring, semiring.name) From d2a290f13fce35c823d635bdac17ec020666e30c Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Mon, 20 Apr 2020 22:26:14 -0500 Subject: [PATCH 5/9] Datatypes are no longer hashable. It's risky to have flexible equality *and* hashability. I think it's best to choose one; in this case, flexible equality is nicer. --- grblas/dtypes.py | 29 +++++----- grblas/numpyops/monoid.py | 109 ++++++++++++++++++-------------------- grblas/ops.py | 2 + 3 files changed, 68 insertions(+), 72 deletions(-) diff --git a/grblas/dtypes.py b/grblas/dtypes.py index 3f2cc4008..db2e3d392 100644 --- a/grblas/dtypes.py +++ b/grblas/dtypes.py @@ -16,9 +16,6 @@ def __init__(self, name, gb_type, c_type, numba_type): def __repr__(self): return self.name - def __hash__(self): - return hash((self.name, self.c_type)) - def __eq__(self, other): if isinstance(other, DataType): return self.gb_type == other.gb_type @@ -55,30 +52,30 @@ def from_pytype(cls, pytype): # Used for testing user-defined functions _sample_values = { - INT8: np.int8(1), - UINT8: np.uint8(1), - INT16: np.int16(1), - UINT16: np.uint16(1), - INT32: np.int32(1), - UINT32: np.uint32(1), - INT64: np.int64(1), - UINT64: np.uint64(1), - FP32: np.float32(0.5), - FP64: np.float64(0.5), - BOOL: np.bool_(True), + INT8.name: np.int8(1), + UINT8.name: np.uint8(1), + INT16.name: np.int16(1), + UINT16.name: np.uint16(1), + INT32.name: np.int32(1), + UINT32.name: np.uint32(1), + INT64.name: np.int64(1), + UINT64.name: np.uint64(1), + FP32.name: np.float32(0.5), + FP64.name: np.float64(0.5), + BOOL.name: np.bool_(True), } # Create register to easily lookup types by name, gb_type, or c_type _registry = {} -for dtype, val in _sample_values.items(): +for dtype in [BOOL, INT8, UINT8, INT16, UINT16, INT32, UINT32, INT64, UINT64, FP32, FP64]: _registry[dtype.name] = dtype _registry[dtype.gb_type] = dtype _registry[dtype.c_type] = dtype _registry[dtype.numba_type] = dtype _registry[dtype.numba_type.name] = dtype + val = _sample_values[dtype.name] _registry[val.dtype] = dtype _registry[val.dtype.name] = dtype -del dtype # Upcast numpy float16 to float32 _registry[np.dtype(np.float16)] = FP32 _registry['float16'] = FP32 diff --git a/grblas/numpyops/monoid.py b/grblas/numpyops/monoid.py index a7d04edd4..dfecc33e3 100644 --- a/grblas/numpyops/monoid.py +++ b/grblas/numpyops/monoid.py @@ -1,13 +1,10 @@ import numpy as np -from .. import dtypes, ops +from .. import ops from . import binary -_float_dtypes = {dtypes.FP32, dtypes.FP64} -_int_dtypes = { - dtypes.INT8, dtypes.UINT8, dtypes.INT16, dtypes.UINT16, - dtypes.INT32, dtypes.UINT32, dtypes.INT64, dtypes.UINT64, -} -_bool_int_dtypes = _int_dtypes | {dtypes.BOOL} +_float_dtypes = {'FP32', 'FP64'} +_int_dtypes = {'INT8', 'UINT8', 'INT16', 'UINT16', 'INT32', 'UINT32', 'INT64', 'UINT64'} +_bool_int_dtypes = _int_dtypes | {'BOOL'} _monoid_identities = { # Math operations @@ -21,66 +18,66 @@ 'hypot': dict.fromkeys(_float_dtypes, 0.), # Bit-twiddling functions - 'bitwise_and': {dtype: True if dtype is dtypes.BOOL else -1 for dtype in _bool_int_dtypes}, + 'bitwise_and': {dtype: True if dtype == 'BOOL' else -1 for dtype in _bool_int_dtypes}, 'bitwise_or': dict.fromkeys(_bool_int_dtypes, 0), 'bitwise_xor': dict.fromkeys(_bool_int_dtypes, 0), # Comparison functions - 'equal': {dtypes.BOOL: True}, - 'logical_and': {dtypes.BOOL: True}, - 'logical_or': {dtypes.BOOL: True}, - 'logical_xor': {dtypes.BOOL: False}, + 'equal': {'BOOL': True}, + 'logical_and': {'BOOL': True}, + 'logical_or': {'BOOL': True}, + 'logical_xor': {'BOOL': False}, 'maximum': { - dtypes.BOOL: False, - dtypes.INT8: np.iinfo(np.int8).min, - dtypes.UINT8: 0, - dtypes.INT16: np.iinfo(np.int16).min, - dtypes.UINT16: 0, - dtypes.INT32: np.iinfo(np.int32).min, - dtypes.UINT32: 0, - dtypes.INT64: np.iinfo(np.int64).min, - dtypes.UINT64: 0, - dtypes.FP32: -np.inf, - dtypes.FP64: -np.inf, + 'BOOL': False, + 'INT8': np.iinfo(np.int8).min, + 'UINT8': 0, + 'INT16': np.iinfo(np.int16).min, + 'UINT16': 0, + 'INT32': np.iinfo(np.int32).min, + 'UINT32': 0, + 'INT64': np.iinfo(np.int64).min, + 'UINT64': 0, + 'FP32': -np.inf, + 'FP64': -np.inf, }, 'minimum': { - dtypes.BOOL: True, - dtypes.INT8: np.iinfo(np.int8).max, - dtypes.UINT8: np.iinfo(np.uint8).max, - dtypes.INT16: np.iinfo(np.int16).max, - dtypes.UINT16: np.iinfo(np.uint16).max, - dtypes.INT32: np.iinfo(np.int32).max, - dtypes.UINT32: np.iinfo(np.uint32).max, - dtypes.INT64: np.iinfo(np.int64).max, - dtypes.UINT64: np.iinfo(np.uint64).max, - dtypes.FP32: np.inf, - dtypes.FP64: np.inf, + 'BOOL': True, + 'INT8': np.iinfo(np.int8).max, + 'UINT8': np.iinfo(np.uint8).max, + 'INT16': np.iinfo(np.int16).max, + 'UINT16': np.iinfo(np.uint16).max, + 'INT32': np.iinfo(np.int32).max, + 'UINT32': np.iinfo(np.uint32).max, + 'INT64': np.iinfo(np.int64).max, + 'UINT64': np.iinfo(np.uint64).max, + 'FP32': np.inf, + 'FP64': np.inf, }, 'fmax': { - dtypes.BOOL: False, - dtypes.INT8: np.iinfo(np.int8).min, - dtypes.UINT8: 0, - dtypes.INT16: np.iinfo(np.int8).min, - dtypes.UINT16: 0, - dtypes.INT32: np.iinfo(np.int8).min, - dtypes.UINT32: 0, - dtypes.INT64: np.iinfo(np.int8).min, - dtypes.UINT64: 0, - dtypes.FP32: -np.inf, # or np.nan? - dtypes.FP64: -np.inf, # or np.nan? + 'BOOL': False, + 'INT8': np.iinfo(np.int8).min, + 'UINT8': 0, + 'INT16': np.iinfo(np.int8).min, + 'UINT16': 0, + 'INT32': np.iinfo(np.int8).min, + 'UINT32': 0, + 'INT64': np.iinfo(np.int8).min, + 'UINT64': 0, + 'FP32': -np.inf, # or np.nan? + 'FP64': -np.inf, # or np.nan? }, 'fmin': { - dtypes.BOOL: True, - dtypes.INT8: np.iinfo(np.int8).max, - dtypes.UINT8: np.iinfo(np.uint8).max, - dtypes.INT16: np.iinfo(np.int16).max, - dtypes.UINT16: np.iinfo(np.uint16).max, - dtypes.INT32: np.iinfo(np.int32).max, - dtypes.UINT32: np.iinfo(np.uint32).max, - dtypes.INT64: np.iinfo(np.int64).max, - dtypes.UINT64: np.iinfo(np.uint64).max, - dtypes.FP32: np.inf, # or np.nan? - dtypes.FP64: np.inf, # or np.nan? + 'BOOL': True, + 'INT8': np.iinfo(np.int8).max, + 'UINT8': np.iinfo(np.uint8).max, + 'INT16': np.iinfo(np.int16).max, + 'UINT16': np.iinfo(np.uint16).max, + 'INT32': np.iinfo(np.int32).max, + 'UINT32': np.iinfo(np.uint32).max, + 'INT64': np.iinfo(np.int64).max, + 'UINT64': np.iinfo(np.uint64).max, + 'FP32': np.inf, # or np.nan? + 'FP64': np.inf, # or np.nan? }, } diff --git a/grblas/ops.py b/grblas/ops.py index 8ecd70e10..37d469e02 100644 --- a/grblas/ops.py +++ b/grblas/ops.py @@ -139,6 +139,7 @@ def _build(cls, name, func): return_types = {} nt = numba.types for type_, sample_val in dtypes._sample_values.items(): + type_ = dtypes.lookup(type_) # Check if func can handle this data type try: with np.errstate(divide='ignore', over='ignore', under='ignore', invalid='ignore'): @@ -233,6 +234,7 @@ def _build(cls, name, func): return_types = {} nt = numba.types for type_, sample_val in dtypes._sample_values.items(): + type_ = dtypes.lookup(type_) # Check if func can handle this data type try: with np.errstate(divide='ignore', over='ignore', under='ignore', invalid='ignore'): From 0bb66a6570069955cb03faf950ecd80d71e945a2 Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Mon, 20 Apr 2020 22:57:07 -0500 Subject: [PATCH 6/9] Move e.g. `npops.binary` to `binary.numpy`. --- grblas/__init__.py | 5 ++-- grblas/{binary.py => binary/__init__.py} | 0 .../{numpyops/binary.py => binary/numpy.py} | 5 ++-- grblas/{monoid.py => monoid/__init__.py} | 0 .../{numpyops/monoid.py => monoid/numpy.py} | 10 ++++---- grblas/ops.py | 3 ++- grblas/{semiring.py => semiring/__init__.py} | 0 .../semiring.py => semiring/numpy.py} | 22 ++++++++--------- grblas/{unary.py => unary/__init__.py} | 0 grblas/{numpyops/unary.py => unary/numpy.py} | 5 ++-- test/test_numpyops.py | 7 ++++-- test/test_op.py | 24 +++++++++---------- 12 files changed, 39 insertions(+), 42 deletions(-) rename grblas/{binary.py => binary/__init__.py} (100%) rename grblas/{numpyops/binary.py => binary/numpy.py} (89%) rename grblas/{monoid.py => monoid/__init__.py} (100%) rename grblas/{numpyops/monoid.py => monoid/numpy.py} (92%) rename grblas/{semiring.py => semiring/__init__.py} (100%) rename grblas/{numpyops/semiring.py => semiring/numpy.py} (84%) rename grblas/{unary.py => unary/__init__.py} (100%) rename grblas/{numpyops/unary.py => unary/numpy.py} (90%) diff --git a/grblas/__init__.py b/grblas/__init__.py index 7b36aa2d4..29e337d60 100644 --- a/grblas/__init__.py +++ b/grblas/__init__.py @@ -4,7 +4,7 @@ _init_params = None _SPECIAL_ATTRS = ["lib", "ffi", "Matrix", "Vector", "Scalar", "base", "exceptions", "matrix", "ops", "scalar", "vector" - "unary", "binary", "monoid", "semiring", "numpyops"] + "unary", "binary", "monoid", "semiring"] def __getattr__(name): @@ -31,7 +31,7 @@ def init(backend="suitesparse", blocking=True): def _init(backend, blocking, automatic=False): global _init_params, lib, ffi, Matrix, Vector, Scalar global base, exceptions, matrix, ops, scalar, vector - global unary, binary, monoid, semiring, numpyops + global unary, binary, monoid, semiring passed_params = dict(backend=backend, blocking=blocking, automatic=automatic) if _init_params is None: @@ -68,7 +68,6 @@ def _init(backend, blocking, automatic=False): matrix = importlib.import_module(f".matrix", __name__) vector = importlib.import_module(f".vector", __name__) scalar = importlib.import_module(f".scalar", __name__) - numpyops = importlib.import_module(f".numpyops", __name__) from .matrix import Matrix from .vector import Vector from .scalar import Scalar diff --git a/grblas/binary.py b/grblas/binary/__init__.py similarity index 100% rename from grblas/binary.py rename to grblas/binary/__init__.py diff --git a/grblas/numpyops/binary.py b/grblas/binary/numpy.py similarity index 89% rename from grblas/numpyops/binary.py rename to grblas/binary/numpy.py index 5d8b665ad..6aff8430c 100644 --- a/grblas/numpyops/binary.py +++ b/grblas/binary/numpy.py @@ -59,6 +59,5 @@ def __getattr__(name): if name not in _binary_names: raise AttributeError(f"module {__name__!r} has no attribute {name!r}") numpy_func = getattr(np, name) - func = ops.BinaryOp.register_anonymous(lambda x, y: numpy_func(x, y), name) - globals()[name] = func - return func + ops.BinaryOp.register_new(f'numpy.{name}', lambda x, y: numpy_func(x, y)) + return globals()[name] diff --git a/grblas/monoid.py b/grblas/monoid/__init__.py similarity index 100% rename from grblas/monoid.py rename to grblas/monoid/__init__.py diff --git a/grblas/numpyops/monoid.py b/grblas/monoid/numpy.py similarity index 92% rename from grblas/numpyops/monoid.py rename to grblas/monoid/numpy.py index dfecc33e3..eba0450cf 100644 --- a/grblas/numpyops/monoid.py +++ b/grblas/monoid/numpy.py @@ -1,6 +1,5 @@ import numpy as np -from .. import ops -from . import binary +from .. import ops, binary _float_dtypes = {'FP32', 'FP64'} _int_dtypes = {'INT8', 'UINT8', 'INT16', 'UINT16', 'INT32', 'UINT32', 'INT64', 'UINT64'} @@ -89,7 +88,6 @@ def __dir__(): def __getattr__(name): if name not in _monoid_identities: raise AttributeError(f"module {__name__!r} has no attribute {name!r}") - func = getattr(binary, name) - monoid = ops.Monoid.register_anonymous(func, _monoid_identities[name], name) - globals()[name] = monoid - return monoid + func = getattr(binary.numpy, name) + ops.Monoid.register_new(f'numpy.{name}', func, _monoid_identities[name]) + return globals()[name] diff --git a/grblas/ops.py b/grblas/ops.py index 37d469e02..5bff960e7 100644 --- a/grblas/ops.py +++ b/grblas/ops.py @@ -1,4 +1,5 @@ import re +import types import numpy as np import numba from collections.abc import Mapping @@ -67,7 +68,7 @@ def _remove_nesting(cls, funcname): setattr(module, folder, OpPath(module, folder)) module = getattr(module, folder) modname = f'{modname}.{folder}' - if type(module) is not OpPath: + if not isinstance(module, (OpPath, types.ModuleType)): raise AttributeError(f'{modname} is already defined. Cannot use as a nested path.') return module, funcname diff --git a/grblas/semiring.py b/grblas/semiring/__init__.py similarity index 100% rename from grblas/semiring.py rename to grblas/semiring/__init__.py diff --git a/grblas/numpyops/semiring.py b/grblas/semiring/numpy.py similarity index 84% rename from grblas/numpyops/semiring.py rename to grblas/semiring/numpy.py index 0b83f24a8..e19735943 100644 --- a/grblas/numpyops/semiring.py +++ b/grblas/semiring/numpy.py @@ -1,8 +1,7 @@ import itertools -from .. import ops -from . import binary, monoid -from .binary import _binary_names -from .monoid import _monoid_identities +from .. import ops, binary, monoid +from ..binary.numpy import _binary_names +from ..monoid.numpy import _monoid_identities _semiring_names = { f'{monoid_name}_{binary_name}' @@ -63,15 +62,14 @@ def __getattr__(name): words = name.split('_') for i in range(1, len(words)): monoid_name = '_'.join(words[:i]) - if not hasattr(monoid, monoid_name): + if not hasattr(monoid.numpy, monoid_name): continue binary_name = '_'.join(words[i:]) - if hasattr(binary, binary_name): + if hasattr(binary.numpy, binary_name): break - semiring = ops.Semiring.register_anonymous( - getattr(monoid, monoid_name), - getattr(binary, binary_name), - name + ops.Semiring.register_new( + f'numpy.{name}', + getattr(monoid.numpy, monoid_name), + getattr(binary.numpy, binary_name), ) - globals()[name] = semiring - return semiring + return globals()[name] diff --git a/grblas/unary.py b/grblas/unary/__init__.py similarity index 100% rename from grblas/unary.py rename to grblas/unary/__init__.py diff --git a/grblas/numpyops/unary.py b/grblas/unary/numpy.py similarity index 90% rename from grblas/numpyops/unary.py rename to grblas/unary/numpy.py index 7bd6ebde1..cb8d34ecc 100644 --- a/grblas/numpyops/unary.py +++ b/grblas/unary/numpy.py @@ -67,6 +67,5 @@ def __getattr__(name): if name not in _unary_names: raise AttributeError(f"module {__name__!r} has no attribute {name!r}") numpy_func = getattr(np, name) - func = ops.UnaryOp.register_anonymous(lambda x: numpy_func(x)) - globals()[name] = func - return func + ops.UnaryOp.register_new(f'numpy.{name}', lambda x: numpy_func(x)) + return globals()[name] diff --git a/test/test_numpyops.py b/test/test_numpyops.py index 5e69a8164..f9d42c6d8 100644 --- a/test/test_numpyops.py +++ b/test/test_numpyops.py @@ -1,7 +1,9 @@ import pytest import itertools import grblas -from grblas.numpyops import binary as npbinary, monoid as npmonoid, semiring as npsemiring +import grblas.binary.numpy as npbinary +import grblas.monoid.numpy as npmonoid +import grblas.semiring.numpy as npsemiring @pytest.mark.slow @@ -13,7 +15,8 @@ def test_npsemiring(): ): monoid = getattr(npmonoid, monoid_name) binary = getattr(npbinary, binary_name) - semiring = grblas.ops.Semiring.register_anonymous(monoid, binary) + name = monoid.name.split(".")[-1] + "_" + binary.name.split(".")[-1] + semiring = grblas.ops.Semiring.register_anonymous(monoid, binary, name) if len(semiring.types) == 0: assert not hasattr(npsemiring, semiring.name) else: diff --git a/test/test_op.py b/test/test_op.py index a7b5eeeba..546d4f3aa 100644 --- a/test/test_op.py +++ b/test/test_op.py @@ -66,18 +66,18 @@ def plus_one(x): def test_unaryop_udf_bool_result(): - pytest.xfail('not sure why numba has trouble compiling this') - # def is_positive(x): - # return x > 0 - # unary.register_new('is_positive', is_positive) - # assert hasattr(UnaryOp, 'is_positive') - # assert unary.is_positive.types == {'INT8', 'INT16', 'INT32', 'INT64', - # 'UINT8', 'UINT16', 'UINT32', 'UINT64', - # 'FP32', 'FP64'} - # v = Vector.new_from_values([0,1,3], [1,2,-4], dtype=dtypes.INT32) - # w = v.apply(unary.is_positive).new() - # result = Vector.new_from_values([0,1,3], [True,True,False], dtype=dtypes.BOOL) - # assert v == result + # numba has trouble compiling this, but we have a work-around + def is_positive(x): + return x > 0 + UnaryOp.register_new('is_positive', is_positive) + assert hasattr(unary, 'is_positive') + assert unary.is_positive.types == {'INT8', 'INT16', 'INT32', 'INT64', + 'UINT8', 'UINT16', 'UINT32', 'UINT64', + 'FP32', 'FP64', 'BOOL'} + v = Vector.new_from_values([0, 1, 3], [1, 2, -4], dtype=dtypes.INT32) + w = v.apply(unary.is_positive).new() + result = Vector.new_from_values([0, 1, 3], [True, True, False], dtype=dtypes.BOOL) + assert w.isequal(result) def test_binaryop_udf(): From d841090bf0836929a29471455d7f156bdd27b7b8 Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Mon, 20 Apr 2020 23:18:25 -0500 Subject: [PATCH 7/9] Expose `grblas.binary.numpy` (and others) --- grblas/__init__.py | 7 ++++++- grblas/numpyops/__init__.py | 8 -------- grblas/vector.py | 2 +- 3 files changed, 7 insertions(+), 10 deletions(-) delete mode 100644 grblas/numpyops/__init__.py diff --git a/grblas/__init__.py b/grblas/__init__.py index 29e337d60..4c61fa7a6 100644 --- a/grblas/__init__.py +++ b/grblas/__init__.py @@ -3,7 +3,7 @@ _init_params = None _SPECIAL_ATTRS = ["lib", "ffi", "Matrix", "Vector", "Scalar", - "base", "exceptions", "matrix", "ops", "scalar", "vector" + "base", "exceptions", "matrix", "ops", "scalar", "vector", "unary", "binary", "monoid", "semiring"] @@ -76,3 +76,8 @@ def _init(backend, blocking, automatic=False): ops.BinaryOp._initialize() ops.Monoid._initialize() ops.Semiring._initialize() + + from .unary import numpy # noqa + from .binary import numpy # noqa + from .monoid import numpy # noqa + from .semiring import numpy # noqa diff --git a/grblas/numpyops/__init__.py b/grblas/numpyops/__init__.py deleted file mode 100644 index d58f6c53b..000000000 --- a/grblas/numpyops/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -""" Create UDFs of numpy functions supported by numba. - -See list of numpy ufuncs supported by numpy here: - -https://numba.pydata.org/numba-doc/dev/reference/numpysupported.html#math-operations - -""" -from . import unary, binary, monoid, semiring # noqa diff --git a/grblas/vector.py b/grblas/vector.py index ede0de3e8..ab090f44e 100644 --- a/grblas/vector.py +++ b/grblas/vector.py @@ -11,7 +11,7 @@ def _generate_isclose(rel_tol, abs_tol): # numba will inline the current values of `rel_tol` and `abs_tol` below def isclose(x, y): return abs(x - y) <= max(rel_tol * max(abs(x), abs(y)), abs_tol) - return BinaryOp.register_anonymous(isclose, f'isclose(rel_tol={rel_tol}, abs_tol={abs_tol})') + return BinaryOp.register_anonymous(isclose, f'isclose(rel_tol={rel_tol:g}, abs_tol={abs_tol:g})') class Vector(GbContainer): From ba1f8b73c096ec01645af9b4e455628c56c1d174 Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Mon, 20 Apr 2020 23:29:29 -0500 Subject: [PATCH 8/9] point to supported numba ufuncs --- grblas/binary/numpy.py | 7 +++++++ grblas/monoid/numpy.py | 7 +++++++ grblas/semiring/numpy.py | 7 +++++++ grblas/unary/numpy.py | 7 +++++++ 4 files changed, 28 insertions(+) diff --git a/grblas/binary/numpy.py b/grblas/binary/numpy.py index 6aff8430c..32cd9d88b 100644 --- a/grblas/binary/numpy.py +++ b/grblas/binary/numpy.py @@ -1,3 +1,10 @@ +""" Create UDFs of numpy functions supported by numba. + +See list of numpy ufuncs supported by numpy here: + +https://numba.pydata.org/numba-doc/dev/reference/numpysupported.html#math-operations + +""" import numpy as np from .. import ops diff --git a/grblas/monoid/numpy.py b/grblas/monoid/numpy.py index eba0450cf..c0a90f85b 100644 --- a/grblas/monoid/numpy.py +++ b/grblas/monoid/numpy.py @@ -1,3 +1,10 @@ +""" Create UDFs of numpy functions supported by numba. + +See list of numpy ufuncs supported by numpy here: + +https://numba.pydata.org/numba-doc/dev/reference/numpysupported.html#math-operations + +""" import numpy as np from .. import ops, binary diff --git a/grblas/semiring/numpy.py b/grblas/semiring/numpy.py index e19735943..99873e667 100644 --- a/grblas/semiring/numpy.py +++ b/grblas/semiring/numpy.py @@ -1,3 +1,10 @@ +""" Create UDFs of numpy functions supported by numba. + +See list of numpy ufuncs supported by numpy here: + +https://numba.pydata.org/numba-doc/dev/reference/numpysupported.html#math-operations + +""" import itertools from .. import ops, binary, monoid from ..binary.numpy import _binary_names diff --git a/grblas/unary/numpy.py b/grblas/unary/numpy.py index cb8d34ecc..a3dcc88e7 100644 --- a/grblas/unary/numpy.py +++ b/grblas/unary/numpy.py @@ -1,3 +1,10 @@ +""" Create UDFs of numpy functions supported by numba. + +See list of numpy ufuncs supported by numpy here: + +https://numba.pydata.org/numba-doc/dev/reference/numpysupported.html#math-operations + +""" import numpy as np from .. import ops From ad8b74cca3443a4ce4a1f2784aab9dcde8214a5d Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Wed, 22 Apr 2020 23:54:45 -0500 Subject: [PATCH 9/9] Tests for numpy/numba udfs! Whew. This revealed that our hack to support boolean unary and binary functions is not compatible with monoids and semirings. Oh well. All bool operations in `unary.numpy` and `binary.numpy` should work "as expected", which, admittedly, is a little weird at times. Bool to float operations go to FP32. --- .travis.yml | 2 +- grblas/monoid/numpy.py | 8 +- grblas/ops.py | 6 +- grblas/semiring/numpy.py | 20 +++++ grblas/unary/numpy.py | 8 +- grblas/vector.py | 2 +- test/test_numpyops.py | 178 ++++++++++++++++++++++++++++++++++++++- test/test_op.py | 3 +- test/test_vector.py | 4 + 9 files changed, 218 insertions(+), 13 deletions(-) diff --git a/.travis.yml b/.travis.yml index 884e533f3..bac79b755 100644 --- a/.travis.yml +++ b/.travis.yml @@ -22,7 +22,7 @@ install: - pip install -e . script: - - pytest + - pytest --runslow notifications: email: false diff --git a/grblas/monoid/numpy.py b/grblas/monoid/numpy.py index c0a90f85b..6d0d1196b 100644 --- a/grblas/monoid/numpy.py +++ b/grblas/monoid/numpy.py @@ -29,10 +29,10 @@ 'bitwise_xor': dict.fromkeys(_bool_int_dtypes, 0), # Comparison functions - 'equal': {'BOOL': True}, - 'logical_and': {'BOOL': True}, - 'logical_or': {'BOOL': True}, - 'logical_xor': {'BOOL': False}, + # 'equal': {'BOOL': True}, # Not yet supported + # 'logical_and': {'BOOL': True}, # Not yet supported + # 'logical_or': {'BOOL': True}, # Not yet supported + # 'logical_xor': {'BOOL': False}, # Not yet supported 'maximum': { 'BOOL': False, 'INT8': np.iinfo(np.int8).min, diff --git a/grblas/ops.py b/grblas/ops.py index 5bff960e7..680870fe8 100644 --- a/grblas/ops.py +++ b/grblas/ops.py @@ -344,6 +344,8 @@ def _build(cls, name, binaryop, identity): else: identities = identity for type_, identity in identities.items(): + if type_ == 'BOOL': # Not yet supported + continue type_ = dtypes.lookup(type_) new_monoid = ffi.new('GrB_Monoid*') func = getattr(lib, f'GrB_Monoid_new_{type_.name}') @@ -401,7 +403,9 @@ def _build(cls, name, monoid, binaryop): new_type_obj = cls(name) for binary_in, binary_func in binaryop._specific_types.items(): binary_out = find_return_type(binary_func) - if binary_out not in monoid.types: + # Unfortunately, we can't have user-defined monoids over bools yet + # because numba can't compile correctly. + if binary_out not in monoid.types or binary_out == 'BOOL': continue binary_out = dtypes.lookup(binary_out) new_semiring = ffi.new('GrB_Semiring*') diff --git a/grblas/semiring/numpy.py b/grblas/semiring/numpy.py index 99873e667..959752b32 100644 --- a/grblas/semiring/numpy.py +++ b/grblas/semiring/numpy.py @@ -57,6 +57,26 @@ 'logical_and', 'logical_or', 'logical_xor', 'not_equal'} ) } +# XXX: we can't handle any semirings with bool at the moment +# _ +_semiring_names -= { + f'{monoid_name}_{binary_name}' + for monoid_name, binary_name in itertools.product( + {'add', 'bitwise_and', 'bitwise_or', 'bitwise_xor', 'fmax', 'fmin', 'maximum', 'minimum', 'multiply'}, + {'equal', 'greater', 'greater_equal', 'less', 'less_equal', + 'logical_and', 'logical_or', 'logical_xor', 'not_equal'} + ) +} +# _ +_semiring_names -= { + f'{monoid_name}_{binary_name}' + for monoid_name, binary_name in itertools.product( + {'equal', 'logical_and', 'logical_or', 'logical_xor'}, + {'add', 'bitwise_and', 'bitwise_or', 'bitwise_xor', 'equal', 'fmax', 'fmin', + 'greater', 'greater_equal', 'less', 'less_equal', 'logical_and', 'logical_or', + 'logical_xor', 'maximum', 'minimum', 'multiply', 'not_equal'} + ) +} def __dir__(): diff --git a/grblas/unary/numpy.py b/grblas/unary/numpy.py index a3dcc88e7..7386e2882 100644 --- a/grblas/unary/numpy.py +++ b/grblas/unary/numpy.py @@ -6,7 +6,7 @@ """ import numpy as np -from .. import ops +from .. import ops, unary _unary_names = { # Math operations @@ -75,4 +75,8 @@ def __getattr__(name): raise AttributeError(f"module {__name__!r} has no attribute {name!r}") numpy_func = getattr(np, name) ops.UnaryOp.register_new(f'numpy.{name}', lambda x: numpy_func(x)) - return globals()[name] + rv = globals()[name] + if name in {'invert', 'bitwise_not'}: + # numba has difficulty compiling with bool dtypes, so fix our hack + rv._specific_types['BOOL'] = unary.numpy.logical_not._specific_types['BOOL'] + return rv diff --git a/grblas/vector.py b/grblas/vector.py index e03026b20..031b7843e 100644 --- a/grblas/vector.py +++ b/grblas/vector.py @@ -10,7 +10,7 @@ def _generate_isclose(rel_tol, abs_tol): # numba will inline the current values of `rel_tol` and `abs_tol` below def isclose(x, y): - return abs(x - y) <= max(rel_tol * max(abs(x), abs(y)), abs_tol) + return x == y or abs(x - y) <= max(rel_tol * max(abs(x), abs(y)), abs_tol) return BinaryOp.register_anonymous(isclose, f'isclose(rel_tol={rel_tol:g}, abs_tol={abs_tol:g})') diff --git a/test/test_numpyops.py b/test/test_numpyops.py index f9d42c6d8..5edab54df 100644 --- a/test/test_numpyops.py +++ b/test/test_numpyops.py @@ -1,14 +1,186 @@ +# These tests are very slow, since they force creation of all numpy unary, binary, monoid, and semiring objects import pytest +import numpy as np import itertools import grblas +import grblas.unary.numpy as npunary import grblas.binary.numpy as npbinary import grblas.monoid.numpy as npmonoid import grblas.semiring.numpy as npsemiring +@pytest.mark.slow +def test_npunary(): + L = list(range(5)) + data = [ + [grblas.Vector.from_values([0, 1], [True, False]), np.array([True, False])], + [grblas.Vector.from_values(L, L), np.array(L, dtype=int)], + [grblas.Vector.from_values(L, L, dtype='float64'), np.array(L, dtype=np.float64)], + ] + blacklist = {} + isclose = grblas.vector._generate_isclose(1e-7, 0) + for gb_input, np_input in data: + for unary_name in sorted(npunary._unary_names): + op = getattr(npunary, unary_name) + if gb_input.dtype.name not in op.types or unary_name in blacklist.get(gb_input.dtype.name, ()): + continue + with np.errstate(divide='ignore', over='ignore', under='ignore', invalid='ignore'): + gb_result = gb_input.apply(op).new() + if gb_input.dtype == 'BOOL' and gb_result.dtype == 'FP32': + np_result = getattr(np, unary_name)(np_input, dtype='float32') + compare_op = isclose + else: + np_result = getattr(np, unary_name)(np_input) + compare_op = npbinary.equal + np_result = grblas.Vector.from_values(list(range(np_input.size)), list(np_result), dtype=gb_result.dtype) + assert gb_result.nvals == np_result.size + match = gb_result.ewise_mult(np_result, compare_op).new() + match(accum=grblas.binary.lor) << gb_result.apply(npunary.isnan) + compare = match.reduce(grblas.monoid.land).value + if not compare: + print(unary_name, gb_input.dtype) + print(gb_result.show()) + print(np_result.show()) + assert compare + + +@pytest.mark.slow +def test_npbinary(): + values1 = [0, 0, 1, 1, 2, 5] + values2 = [0, 1, 0, 1, 3, 8] + index = list(range(len(values1))) + data = [ + [ + [ + grblas.Vector.from_values(index, values1), + grblas.Vector.from_values(index, values2), + ], + [ + np.array(values1, dtype=int), + np.array(values2, dtype=int), + ], + ], + [ + [ + grblas.Vector.from_values(index, values1, dtype='float64'), + grblas.Vector.from_values(index, values2, dtype='float64'), + ], + [ + np.array(values1, dtype=np.float64), + np.array(values2, dtype=np.float64), + ], + ], + [ + [ + grblas.Vector.from_values([0, 1, 2, 3], [True, False, True, False]), + grblas.Vector.from_values([0, 1, 2, 3], [True, True, False, False]), + ], + [ + np.array([True, False, True, False]), + np.array([True, True, False, False]), + ], + ], + ] + blacklist = { + 'FP64': { + 'floor_divide', # numba/numpy difference for 1.0 / 0.0 + }, + } + isclose = grblas.vector._generate_isclose(1e-7, 0) + for (gb_left, gb_right), (np_left, np_right) in data: + for binary_name in sorted(npbinary._binary_names): + op = getattr(npbinary, binary_name) + if gb_left.dtype.name not in op.types or binary_name in blacklist.get(gb_left.dtype.name, ()): + continue + with np.errstate(divide='ignore', over='ignore', under='ignore', invalid='ignore'): + gb_result = gb_left.ewise_mult(gb_right, op).new() + if gb_left.dtype == 'BOOL' and gb_result.dtype == 'FP32': + np_result = getattr(np, binary_name)(np_left, np_right, dtype='float32') + compare_op = isclose + else: + np_result = getattr(np, binary_name)(np_left, np_right) + compare_op = npbinary.equal + np_result = grblas.Vector.from_values(list(range(np_left.size)), list(np_result), dtype=gb_result.dtype) + assert gb_result.nvals == np_result.size + match = gb_result.ewise_mult(np_result, compare_op).new() + match(accum=grblas.binary.lor) << gb_result.apply(npunary.isnan) + compare = match.reduce(grblas.monoid.land).value + if not compare: + print(binary_name, gb_left.dtype) + print(gb_result.show()) + print(np_result.show()) + assert compare + + +@pytest.mark.slow +def test_npmonoid(): + values1 = [0, 0, 1, 1, 2, 5] + values2 = [0, 1, 0, 1, 3, 8] + index = list(range(len(values1))) + data = [ + [ + [ + grblas.Vector.from_values(index, values1), + grblas.Vector.from_values(index, values2), + ], + [ + np.array(values1, dtype=int), + np.array(values2, dtype=int), + ], + ], + [ + [ + grblas.Vector.from_values(index, values1, dtype='float64'), + grblas.Vector.from_values(index, values2, dtype='float64'), + ], + [ + np.array(values1, dtype=np.float64), + np.array(values2, dtype=np.float64), + ], + ], + [ + [ + grblas.Vector.from_values([0, 1, 2, 3], [True, False, True, False]), + grblas.Vector.from_values([0, 1, 2, 3], [True, True, False, False]), + ], + [ + np.array([True, False, True, False]), + np.array([True, True, False, False]), + ], + ], + ] + blacklist = {} + for (gb_left, gb_right), (np_left, np_right) in data: + for binary_name in sorted(npmonoid._monoid_identities): + op = getattr(npmonoid, binary_name) + assert len(op.types) > 0, op.name + if gb_left.dtype.name not in op.types or binary_name in blacklist.get(gb_left.dtype.name, ()): + continue + with np.errstate(divide='ignore', over='ignore', under='ignore', invalid='ignore'): + gb_result = gb_left.ewise_mult(gb_right, op).new() + np_result = getattr(np, binary_name)(np_left, np_right) + np_result = grblas.Vector.from_values(list(range(np_left.size)), list(np_result), dtype=gb_result.dtype) + assert gb_result.nvals == np_result.size + match = gb_result.ewise_mult(np_result, npbinary.equal).new() + match(accum=grblas.binary.lor) << gb_result.apply(npunary.isnan) + compare = match.reduce(grblas.monoid.land).value + if not compare: + print(binary_name, gb_left.dtype) + print(gb_result.show()) + print(np_result.show()) + assert compare + + gb_result = gb_left.reduce(op).new() + np_result = getattr(np, binary_name).reduce(np_left) + assert gb_result.value == np_result + + gb_result = gb_right.reduce(op).new() + np_result = getattr(np, binary_name).reduce(np_right) + assert gb_result.value == np_result + + @pytest.mark.slow def test_npsemiring(): - # This is a very slow test, since it forces creation of all numpy binary, monoid, and semiring objects for monoid_name, binary_name in itertools.product( sorted(npmonoid._monoid_identities), sorted(npbinary._binary_names) @@ -18,6 +190,6 @@ def test_npsemiring(): name = monoid.name.split(".")[-1] + "_" + binary.name.split(".")[-1] semiring = grblas.ops.Semiring.register_anonymous(monoid, binary, name) if len(semiring.types) == 0: - assert not hasattr(npsemiring, semiring.name) + assert not hasattr(npsemiring, semiring.name), name else: - assert hasattr(npsemiring, semiring.name) + assert hasattr(npsemiring, semiring.name), name diff --git a/test/test_op.py b/test/test_op.py index bf66f0b64..4d31b8e2c 100644 --- a/test/test_op.py +++ b/test/test_op.py @@ -101,9 +101,10 @@ def plus_plus_one(x, y): BinaryOp.register_new('plus_plus_one', plus_plus_one) Monoid.register_new('plus_plus_one', binary.plus_plus_one, -1) assert hasattr(monoid, 'plus_plus_one') + # No boolean for monoids yet assert monoid.plus_plus_one.types == {'INT8', 'INT16', 'INT32', 'INT64', 'UINT8', 'UINT16', 'UINT32', 'UINT64', - 'FP32', 'FP64', 'BOOL'} + 'FP32', 'FP64'} v1 = Vector.from_values([0, 1, 3], [1, 2, -4], dtype=dtypes.INT32) v2 = Vector.from_values([0, 2, 3], [2, 3, 7], dtype=dtypes.INT32) w = v1.ewise_add(v2, monoid.plus_plus_one).new() diff --git a/test/test_vector.py b/test/test_vector.py index 687525f24..5dc2b86ee 100644 --- a/test/test_vector.py +++ b/test/test_vector.py @@ -1,4 +1,5 @@ import pytest +import numpy as np from grblas import Matrix, Vector, Scalar from grblas import unary, binary, monoid, semiring from grblas import dtypes @@ -332,6 +333,9 @@ def test_isclose(v): assert u5.isclose(v) u6 = Vector.from_values([1, 3, 4, 6], [1., 1 + 1e-4, 1.99999, 0.]) assert u6.isclose(v, rel_tol=1e-3) + # isclose should consider `inf == inf` + u7 = Vector.from_values([1, 3], [-np.inf, np.inf]) + assert u7.isclose(u7, rel_tol=1e-8) def test_binary_op(v):