diff --git a/.travis.yml b/.travis.yml index 884e533f3..bac79b755 100644 --- a/.travis.yml +++ b/.travis.yml @@ -22,7 +22,7 @@ install: - pip install -e . script: - - pytest + - pytest --runslow notifications: email: false diff --git a/grblas/__init__.py b/grblas/__init__.py index 29e337d60..4c61fa7a6 100644 --- a/grblas/__init__.py +++ b/grblas/__init__.py @@ -3,7 +3,7 @@ _init_params = None _SPECIAL_ATTRS = ["lib", "ffi", "Matrix", "Vector", "Scalar", - "base", "exceptions", "matrix", "ops", "scalar", "vector" + "base", "exceptions", "matrix", "ops", "scalar", "vector", "unary", "binary", "monoid", "semiring"] @@ -76,3 +76,8 @@ def _init(backend, blocking, automatic=False): ops.BinaryOp._initialize() ops.Monoid._initialize() ops.Semiring._initialize() + + from .unary import numpy # noqa + from .binary import numpy # noqa + from .monoid import numpy # noqa + from .semiring import numpy # noqa diff --git a/grblas/binary.py b/grblas/binary/__init__.py similarity index 100% rename from grblas/binary.py rename to grblas/binary/__init__.py diff --git a/grblas/binary/numpy.py b/grblas/binary/numpy.py new file mode 100644 index 000000000..32cd9d88b --- /dev/null +++ b/grblas/binary/numpy.py @@ -0,0 +1,70 @@ +""" Create UDFs of numpy functions supported by numba. + +See list of numpy ufuncs supported by numpy here: + +https://numba.pydata.org/numba-doc/dev/reference/numpysupported.html#math-operations + +""" +import numpy as np +from .. import ops + +_binary_names = { + # Math operations + 'add', + 'subtract', + 'multiply', + 'divide', + 'logaddexp', + 'logaddexp2', + 'true_divide', + 'floor_divide', + 'power', + 'remainder', + 'mod', + 'fmod', + 'gcd', + 'lcm', + + # Trigonometric functions + 'arctan2', + 'hypot', + + # Bit-twiddling functions + 'bitwise_and', + 'bitwise_or', + 'bitwise_xor', + 'left_shift', + 'right_shift', + + # Comparison functions + 'greater', + 'greater_equal', + 'less', + 'less_equal', + 'not_equal', + 'equal', + 'logical_and', + 'logical_or', + 'logical_xor', + 'maximum', + 'minimum', + 'fmax', + 'fmin', + + # Floating functions + 'copysign', + 'nextafter', + 'ldexp', +} + + +def __dir__(): + return list(_binary_names) + + +def __getattr__(name): + if name not in _binary_names: + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") + numpy_func = getattr(np, name) + ops.BinaryOp.register_new(f'numpy.{name}', lambda x, y: numpy_func(x, y)) + return globals()[name] diff --git a/grblas/dtypes.py b/grblas/dtypes.py index c579b3e73..db2e3d392 100644 --- a/grblas/dtypes.py +++ b/grblas/dtypes.py @@ -1,6 +1,7 @@ import re -from . import lib +import numpy as np import numba +from . import lib class DataType: @@ -15,9 +16,6 @@ def __init__(self, name, gb_type, c_type, numba_type): def __repr__(self): return self.name - def __hash__(self): - return hash((self.name, self.c_type)) - def __eq__(self, other): if isinstance(other, DataType): return self.gb_type == other.gb_type @@ -54,28 +52,34 @@ def from_pytype(cls, pytype): # Used for testing user-defined functions _sample_values = { - BOOL: True, - INT8: -3, - UINT8: 3, - INT16: -3, - UINT16: 3, - INT32: -3, - UINT32: 3, - INT64: -3, - UINT64: 3, - FP32: 3.14, - FP64: 3.14 + INT8.name: np.int8(1), + UINT8.name: np.uint8(1), + INT16.name: np.int16(1), + UINT16.name: np.uint16(1), + INT32.name: np.int32(1), + UINT32.name: np.uint32(1), + INT64.name: np.int64(1), + UINT64.name: np.uint64(1), + FP32.name: np.float32(0.5), + FP64.name: np.float64(0.5), + BOOL.name: np.bool_(True), } # Create register to easily lookup types by name, gb_type, or c_type _registry = {} -for x in _sample_values: - _registry[x.name] = x - _registry[x.gb_type] = x - _registry[x.c_type] = x - _registry[x.numba_type] = x - _registry[x.numba_type.name] = x -del x +for dtype in [BOOL, INT8, UINT8, INT16, UINT16, INT32, UINT32, INT64, UINT64, FP32, FP64]: + _registry[dtype.name] = dtype + _registry[dtype.gb_type] = dtype + _registry[dtype.c_type] = dtype + _registry[dtype.numba_type] = dtype + _registry[dtype.numba_type.name] = dtype + val = _sample_values[dtype.name] + _registry[val.dtype] = dtype + _registry[val.dtype.name] = dtype +# Upcast numpy float16 to float32 +_registry[np.dtype(np.float16)] = FP32 +_registry['float16'] = FP32 + # Add some common Python types as lookup keys _registry[int] = DataType.from_pytype(int) _registry[float] = DataType.from_pytype(float) @@ -92,6 +96,10 @@ def lookup(key): if hasattr(key, 'name'): return _registry[key.name] else: + try: + return lookup(np.dtype(key)) + except Exception: + pass raise diff --git a/grblas/matrix.py b/grblas/matrix.py index 270b81e7e..0b853202f 100644 --- a/grblas/matrix.py +++ b/grblas/matrix.py @@ -1,9 +1,9 @@ from functools import partial from .base import lib, ffi, GbContainer, GbDelayed -from .vector import Vector +from .vector import Vector, _generate_isclose from .scalar import Scalar from .ops import BinaryOp, find_opclass, find_return_type, reify_op -from . import dtypes, unary, binary, monoid, semiring +from . import dtypes, binary, monoid, semiring from .exceptions import check_status, is_error, NoValue @@ -57,7 +57,7 @@ def isclose(self, other, *, rel_tol=1e-7, abs_tol=0.0, check_dtype=False): """ Check for approximate equality (including same size and empty values) If `check_dtype` is True, also checks that dtypes match - Closeness check is equivalent to `abs(a-b) <= max(rtol * max(abs(a), abs(b)), atol)` + Closeness check is equivalent to `abs(a-b) <= max(rel_tol * max(abs(a), abs(b)), abs_tol)` """ if not isinstance(other, Matrix): return False @@ -69,28 +69,15 @@ def isclose(self, other, *, rel_tol=1e-7, abs_tol=0.0, check_dtype=False): return False if self.nvals != other.nvals: return False - if check_dtype: - common_dtype = self.dtype - else: - common_dtype = dtypes.unify(self.dtype, other.dtype) - matches = Matrix.new(bool, self.nrows, self.ncols) - tmp1 = self.apply(unary.abs).new(dtype=common_dtype) - tmp2 = other.apply(unary.abs).new(dtype=common_dtype) - tmp1 << tmp1.ewise_mult(tmp2, monoid.max) + isclose = _generate_isclose(rel_tol, abs_tol) + matches = self.ewise_mult(other, isclose).new(dtype=bool) # ewise_mult performs intersection, so nvals will indicate mismatched empty values - if tmp1.nvals != self.nvals: + if matches.nvals != self.nvals: return False - tmp1[:, :](mask=tmp1.S, accum=binary.times) << rel_tol - tmp1[:, :](mask=tmp1.S, accum=binary.max) << abs_tol - tmp2 << self.ewise_mult(other, binary.minus) - tmp2 << tmp2.apply(unary.abs) - matches << tmp2.ewise_mult(tmp1, binary.le[common_dtype]) # Check if all results are True - result = Scalar.new(bool) - result << matches.reduce_scalar(monoid.land) - return result.value + return matches.reduce_scalar(monoid.land).value def __len__(self): return self.nvals diff --git a/grblas/monoid.py b/grblas/monoid/__init__.py similarity index 100% rename from grblas/monoid.py rename to grblas/monoid/__init__.py diff --git a/grblas/monoid/numpy.py b/grblas/monoid/numpy.py new file mode 100644 index 000000000..6d0d1196b --- /dev/null +++ b/grblas/monoid/numpy.py @@ -0,0 +1,100 @@ +""" Create UDFs of numpy functions supported by numba. + +See list of numpy ufuncs supported by numpy here: + +https://numba.pydata.org/numba-doc/dev/reference/numpysupported.html#math-operations + +""" +import numpy as np +from .. import ops, binary + +_float_dtypes = {'FP32', 'FP64'} +_int_dtypes = {'INT8', 'UINT8', 'INT16', 'UINT16', 'INT32', 'UINT32', 'INT64', 'UINT64'} +_bool_int_dtypes = _int_dtypes | {'BOOL'} + +_monoid_identities = { + # Math operations + 'add': 0, + 'multiply': 1, + 'logaddexp': dict.fromkeys(_float_dtypes, -np.inf), + 'logaddexp2': dict.fromkeys(_float_dtypes, -np.inf), + 'gcd': dict.fromkeys(_int_dtypes, 0), + + # Trigonometric functions + 'hypot': dict.fromkeys(_float_dtypes, 0.), + + # Bit-twiddling functions + 'bitwise_and': {dtype: True if dtype == 'BOOL' else -1 for dtype in _bool_int_dtypes}, + 'bitwise_or': dict.fromkeys(_bool_int_dtypes, 0), + 'bitwise_xor': dict.fromkeys(_bool_int_dtypes, 0), + + # Comparison functions + # 'equal': {'BOOL': True}, # Not yet supported + # 'logical_and': {'BOOL': True}, # Not yet supported + # 'logical_or': {'BOOL': True}, # Not yet supported + # 'logical_xor': {'BOOL': False}, # Not yet supported + 'maximum': { + 'BOOL': False, + 'INT8': np.iinfo(np.int8).min, + 'UINT8': 0, + 'INT16': np.iinfo(np.int16).min, + 'UINT16': 0, + 'INT32': np.iinfo(np.int32).min, + 'UINT32': 0, + 'INT64': np.iinfo(np.int64).min, + 'UINT64': 0, + 'FP32': -np.inf, + 'FP64': -np.inf, + }, + 'minimum': { + 'BOOL': True, + 'INT8': np.iinfo(np.int8).max, + 'UINT8': np.iinfo(np.uint8).max, + 'INT16': np.iinfo(np.int16).max, + 'UINT16': np.iinfo(np.uint16).max, + 'INT32': np.iinfo(np.int32).max, + 'UINT32': np.iinfo(np.uint32).max, + 'INT64': np.iinfo(np.int64).max, + 'UINT64': np.iinfo(np.uint64).max, + 'FP32': np.inf, + 'FP64': np.inf, + }, + 'fmax': { + 'BOOL': False, + 'INT8': np.iinfo(np.int8).min, + 'UINT8': 0, + 'INT16': np.iinfo(np.int8).min, + 'UINT16': 0, + 'INT32': np.iinfo(np.int8).min, + 'UINT32': 0, + 'INT64': np.iinfo(np.int8).min, + 'UINT64': 0, + 'FP32': -np.inf, # or np.nan? + 'FP64': -np.inf, # or np.nan? + }, + 'fmin': { + 'BOOL': True, + 'INT8': np.iinfo(np.int8).max, + 'UINT8': np.iinfo(np.uint8).max, + 'INT16': np.iinfo(np.int16).max, + 'UINT16': np.iinfo(np.uint16).max, + 'INT32': np.iinfo(np.int32).max, + 'UINT32': np.iinfo(np.uint32).max, + 'INT64': np.iinfo(np.int64).max, + 'UINT64': np.iinfo(np.uint64).max, + 'FP32': np.inf, # or np.nan? + 'FP64': np.inf, # or np.nan? + }, +} + + +def __dir__(): + return list(_monoid_identities) + + +def __getattr__(name): + if name not in _monoid_identities: + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") + func = getattr(binary.numpy, name) + ops.Monoid.register_new(f'numpy.{name}', func, _monoid_identities[name]) + return globals()[name] diff --git a/grblas/ops.py b/grblas/ops.py index 7bfb61602..680870fe8 100644 --- a/grblas/ops.py +++ b/grblas/ops.py @@ -1,5 +1,8 @@ import re +import types +import numpy as np import numba +from collections.abc import Mapping from types import FunctionType from . import lib, ffi, dtypes, unary, binary, monoid, semiring from .exceptions import GrblasException @@ -65,7 +68,7 @@ def _remove_nesting(cls, funcname): setattr(module, folder, OpPath(module, folder)) module = getattr(module, folder) modname = f'{modname}.{folder}' - if type(module) is not OpPath: + if not isinstance(module, (OpPath, types.ModuleType)): raise AttributeError(f'{modname} is already defined. Cannot use as a nested path.') return module, funcname @@ -127,30 +130,44 @@ class UnaryOp(OpBase): all_known_instances = set() @classmethod - def register_new(cls, name, func): + def _build(cls, name, func): if type(func) is not FunctionType: raise TypeError(f'udf must be a function, not {type(func)}') - module, funcname = cls._remove_nesting(name) + if name is None: + name = getattr(func, '__name__', '') success = False new_type_obj = cls(name) + return_types = {} + nt = numba.types for type_, sample_val in dtypes._sample_values.items(): + type_ = dtypes.lookup(type_) # Check if func can handle this data type try: - ret = func(sample_val) - if type(ret) is bool: - ret_type = dtypes.BOOL - elif type_ == 'BOOL': - # type_ == bool, but return type != bool; invalid - continue - else: + with np.errstate(divide='ignore', over='ignore', under='ignore', invalid='ignore'): + ret = func(sample_val) + ret_type = dtypes.lookup(type(ret)) + if ret_type != type_ and ( + 'INT' in ret_type.name and 'INT' in type_.name + or 'FP' in ret_type.name and 'FP' in type_.name + or type_ == 'UINT64' and ret_type == 'FP64' and return_types.get('INT64') == 'INT64' + ): + # Downcast `ret_type` to `type_`. This is probably what users want most of the time, + # but we can't make a perfect rule. There should be a way for users to be explicit. ret_type = type_ + elif type_ == 'BOOL' and ret_type == 'INT64' and return_types.get('INT8') == 'INT8': + ret_type = dtypes.INT8 + + # Numba has a bug and is unable to handle BOOL correctly right now + # See: https://github.com/numba/numba/issues/5395 + # We're relying on coercion behaving correctly here + input_type = dtypes.INT8 if type_ == 'BOOL' else type_ + return_type = dtypes.INT8 if ret_type == 'BOOL' else ret_type - nt = numba.types # JIT the func so it can be used from a cfunc unary_udf = numba.njit(func) # Build wrapper because GraphBLAS wants pointers and void return - wrapper_sig = nt.void(nt.CPointer(ret_type.numba_type), - nt.CPointer(type_.numba_type)) + wrapper_sig = nt.void(nt.CPointer(return_type.numba_type), + nt.CPointer(input_type.numba_type)) @numba.cfunc(wrapper_sig, nopython=True) def unary_wrapper(z, x): @@ -159,18 +176,29 @@ def unary_wrapper(z, x): new_unary = ffi.new('GrB_UnaryOp*') lib.GrB_UnaryOp_new(new_unary, unary_wrapper.cffi, - ret_type.gb_type, type_.gb_type) + return_type.gb_type, input_type.gb_type) new_type_obj[type_.name] = new_unary[0] _return_type[new_unary[0]] = ret_type.name cls.all_known_instances.add(new_unary[0]) success = True + return_types[type_.name] = ret_type.name except Exception: continue if success: - setattr(module, funcname, new_type_obj) + return new_type_obj else: raise UdfParseError('Unable to parse function using Numba') + @classmethod + def register_anonymous(cls, func, name=None): + return cls._build(name, func) + + @classmethod + def register_new(cls, name, func): + module, funcname = cls._remove_nesting(name) + unary_op = cls._build(name, func) + setattr(module, funcname, unary_op) + class BinaryOp(OpBase): _module = binary @@ -179,9 +207,15 @@ class BinaryOp(OpBase): 'trim_from_front': 4, 'num_underscores': 1, 're_exprs': [ - re.compile('^GrB_(FIRST|SECOND|MIN|MAX|PLUS|MINUS|TIMES|DIV)_(BOOL|INT8|UINT8|INT16|UINT16|INT32|UINT32|INT64|UINT64|FP32|FP64)$'), + re.compile( + '^GrB_(FIRST|SECOND|MIN|MAX|PLUS|MINUS|TIMES|DIV)' + '_(BOOL|INT8|UINT8|INT16|UINT16|INT32|UINT32|INT64|UINT64|FP32|FP64)$' + ), re.compile('^GrB_(LOR|LAND|LXOR)$'), - re.compile('^GxB_(RMINUS|RDIV|PAIR|ANY|ISEQ|ISNE|ISGT|ISLT|ISLE|ISGE)_(BOOL|INT8|UINT8|INT16|UINT16|INT32|UINT32|INT64|UINT64|FP32|FP64)$'), + re.compile( + '^GxB_(RMINUS|RDIV|PAIR|ANY|ISEQ|ISNE|ISGT|ISLT|ISLE|ISGE)' + '_(BOOL|INT8|UINT8|INT16|UINT16|INT32|UINT32|INT64|UINT64|FP32|FP64)$' + ), ], 're_exprs_return_bool': [ re.compile('^GrB_(EQ|NE|GT|LT|GE|LE)_(BOOL|INT8|UINT8|INT16|UINT16|INT32|UINT32|INT64|UINT64|FP32|FP64)$'), @@ -191,31 +225,45 @@ class BinaryOp(OpBase): all_known_instances = set() @classmethod - def register_new(cls, name, func): + def _build(cls, name, func): if type(func) is not FunctionType: raise TypeError(f'udf must be a function, not {type(func)}') - module, funcname = cls._remove_nesting(name) + if name is None: + name = getattr(func, '__name__', '') success = False new_type_obj = cls(name) + return_types = {} + nt = numba.types for type_, sample_val in dtypes._sample_values.items(): + type_ = dtypes.lookup(type_) # Check if func can handle this data type try: - ret = func(sample_val, sample_val) - if type(ret) is bool: - ret_type = dtypes.BOOL - elif type_ == 'BOOL': - # type_ == bool, but return type != bool; invalid - continue - else: + with np.errstate(divide='ignore', over='ignore', under='ignore', invalid='ignore'): + ret = func(sample_val, sample_val) + ret_type = dtypes.lookup(type(ret)) + if ret_type != type_ and ( + 'INT' in ret_type.name and 'INT' in type_.name + or 'FP' in ret_type.name and 'FP' in type_.name + or type_ == 'UINT64' and ret_type == 'FP64' and return_types.get('INT64') == 'INT64' + ): + # Downcast `ret_type` to `type_`. This is probably what users want most of the time, + # but we can't make a perfect rule. There should be a way for users to be explicit. ret_type = type_ + elif type_ == 'BOOL' and ret_type == 'INT64' and return_types.get('INT8') == 'INT8': + ret_type = dtypes.INT8 + + # Numba has a bug and is unable to handle BOOL correctly right now + # See: https://github.com/numba/numba/issues/5395 + # We're relying on coercion behaving correctly here + input_type = dtypes.INT8 if type_ == 'BOOL' else type_ + return_type = dtypes.INT8 if ret_type == 'BOOL' else ret_type - nt = numba.types # JIT the func so it can be used from a cfunc binary_udf = numba.njit(func) # Build wrapper because GraphBLAS wants pointers and void return - wrapper_sig = nt.void(nt.CPointer(ret_type.numba_type), - nt.CPointer(type_.numba_type), - nt.CPointer(type_.numba_type)) + wrapper_sig = nt.void(nt.CPointer(return_type.numba_type), + nt.CPointer(input_type.numba_type), + nt.CPointer(input_type.numba_type)) @numba.cfunc(wrapper_sig, nopython=True) def binary_wrapper(z, x, y): @@ -224,18 +272,29 @@ def binary_wrapper(z, x, y): new_binary = ffi.new('GrB_BinaryOp*') lib.GrB_BinaryOp_new(new_binary, binary_wrapper.cffi, - ret_type.gb_type, type_.gb_type, type_.gb_type) + return_type.gb_type, input_type.gb_type, input_type.gb_type) new_type_obj[type_.name] = new_binary[0] _return_type[new_binary[0]] = ret_type.name cls.all_known_instances.add(new_binary[0]) success = True + return_types[type_.name] = ret_type.name except Exception: continue if success: - setattr(module, funcname, new_type_obj) + return new_type_obj else: raise UdfParseError('Unable to parse function using Numba') + @classmethod + def register_anonymous(cls, func, name=None): + return cls._build(name, func) + + @classmethod + def register_new(cls, name, func): + module, funcname = cls._remove_nesting(name) + binary_op = cls._build(name, func) + setattr(module, funcname, binary_op) + @classmethod def _initialize(cls): super()._initialize() @@ -264,29 +323,49 @@ class Monoid(OpBase): 'trim_from_back': 7, 'num_underscores': 1, 're_exprs': [ - re.compile('^GxB_(MAX|MIN|PLUS|TIMES|ANY)_(INT8|UINT8|INT16|UINT16|INT32|UINT32|INT64|UINT64|FP32|FP64)_MONOID$'), + re.compile( + '^GxB_(MAX|MIN|PLUS|TIMES|ANY)' + '_(INT8|UINT8|INT16|UINT16|INT32|UINT32|INT64|UINT64|FP32|FP64)_MONOID$' + ), re.compile('^GxB_(EQ|LAND|LOR|LXOR|ANY)_BOOL_MONOID$'), ], } all_known_instances = set() @classmethod - def register_new(cls, name, binaryop, zero): + def _build(cls, name, binaryop, identity): if type(binaryop) is not BinaryOp: raise TypeError(f'binaryop must be a BinaryOp, not {type(binaryop)}') - module, funcname = cls._remove_nesting(name) + if name is None: + name = binaryop.name new_type_obj = cls(name) - for type_ in binaryop.types: + if not isinstance(identity, Mapping): + identities = dict.fromkeys(binaryop.types, identity) + else: + identities = identity + for type_, identity in identities.items(): + if type_ == 'BOOL': # Not yet supported + continue type_ = dtypes.lookup(type_) new_monoid = ffi.new('GrB_Monoid*') func = getattr(lib, f'GrB_Monoid_new_{type_.name}') - zcast = ffi.cast(type_.c_type, zero) + zcast = ffi.cast(type_.c_type, identity) func(new_monoid, binaryop[type_], zcast) new_type_obj[type_.name] = new_monoid[0] ret_type = find_return_type(binaryop[type_]) _return_type[new_monoid[0]] = ret_type cls.all_known_instances.add(new_monoid[0]) - setattr(module, funcname, new_type_obj) + return new_type_obj + + @classmethod + def register_anonymous(cls, binaryop, identity, name=None): + return cls._build(name, binaryop, identity) + + @classmethod + def register_new(cls, name, binaryop, identity): + module, funcname = cls._remove_nesting(name) + monoid = cls._build(name, binaryop, identity) + setattr(module, funcname, monoid) class Semiring(OpBase): @@ -296,32 +375,56 @@ class Semiring(OpBase): 'trim_from_front': 4, 'num_underscores': 2, 're_exprs': [ - re.compile('^GxB_(MIN|MAX|PLUS|TIMES|ANY)_(FIRST|SECOND|PAIR|MIN|MAX|PLUS|MINUS|RMINUS|TIMES|DIV|RDIV|ISEQ|ISNE|ISGT|ISLT|ISGE|ISLE|LOR|LAND|LXOR)_(INT8|UINT8|INT16|UINT16|INT32|UINT32|INT64|UINT64|FP32|FP64)$'), + re.compile( + '^GxB_(MIN|MAX|PLUS|TIMES|ANY)' + '_(FIRST|SECOND|PAIR|MIN|MAX|PLUS|MINUS|RMINUS|TIMES' + '|DIV|RDIV|ISEQ|ISNE|ISGT|ISLT|ISGE|ISLE|LOR|LAND|LXOR)' + '_(INT8|UINT8|INT16|UINT16|INT32|UINT32|INT64|UINT64|FP32|FP64)$' + ), re.compile('^GxB_(LOR|LAND|LXOR|EQ|ANY)_(FIRST|SECOND|PAIR|LOR|LAND|LXOR|EQ|GT|LT|GE|LE)_BOOL$'), ], 're_exprs_return_bool': [ - re.compile('^GxB_(LOR|LAND|LXOR|EQ|ANY)_(EQ|NE|GT|LT|GE|LE)_(INT8|UINT8|INT16|UINT16|INT32|UINT32|INT64|UINT64|FP32|FP64)$'), + re.compile( + '^GxB_(LOR|LAND|LXOR|EQ|ANY)_(EQ|NE|GT|LT|GE|LE)' + '_(INT8|UINT8|INT16|UINT16|INT32|UINT32|INT64|UINT64|FP32|FP64)$' + ), ], } all_known_instances = set() @classmethod - def register_new(cls, name, monoid, binaryop): + def _build(cls, name, monoid, binaryop): if type(monoid) is not Monoid: raise TypeError(f'monoid must be a Monoid, not {type(monoid)}') if type(binaryop) != BinaryOp: raise TypeError(f'binaryop must be a BinaryOp, not {type(binaryop)}') - module, funcname = cls._remove_nesting(name) + if name is None: + name = f'{monoid.name}_{binaryop.name}' new_type_obj = cls(name) - for type_ in binaryop.types & monoid.types: - type_ = dtypes.lookup(type_) + for binary_in, binary_func in binaryop._specific_types.items(): + binary_out = find_return_type(binary_func) + # Unfortunately, we can't have user-defined monoids over bools yet + # because numba can't compile correctly. + if binary_out not in monoid.types or binary_out == 'BOOL': + continue + binary_out = dtypes.lookup(binary_out) new_semiring = ffi.new('GrB_Semiring*') - lib.GrB_Semiring_new(new_semiring, monoid[type_], binaryop[type_]) - new_type_obj[type_.name] = new_semiring[0] - ret_type = find_return_type(monoid[type_]) + lib.GrB_Semiring_new(new_semiring, monoid[binary_out], binary_func) + new_type_obj[binary_in] = new_semiring[0] + ret_type = find_return_type(monoid[binary_out]) _return_type[new_semiring[0]] = ret_type cls.all_known_instances.add(new_semiring[0]) - setattr(module, funcname, new_type_obj) + return new_type_obj + + @classmethod + def register_anonymous(cls, monoid, binaryop, name=None): + return cls._build(name, monoid, binaryop) + + @classmethod + def register_new(cls, name, monoid, binaryop): + module, funcname = cls._remove_nesting(name) + semiring = cls._build(name, monoid, binaryop) + setattr(module, funcname, semiring) def find_opclass(gb_op): diff --git a/grblas/semiring.py b/grblas/semiring/__init__.py similarity index 100% rename from grblas/semiring.py rename to grblas/semiring/__init__.py diff --git a/grblas/semiring/numpy.py b/grblas/semiring/numpy.py new file mode 100644 index 000000000..959752b32 --- /dev/null +++ b/grblas/semiring/numpy.py @@ -0,0 +1,102 @@ +""" Create UDFs of numpy functions supported by numba. + +See list of numpy ufuncs supported by numpy here: + +https://numba.pydata.org/numba-doc/dev/reference/numpysupported.html#math-operations + +""" +import itertools +from .. import ops, binary, monoid +from ..binary.numpy import _binary_names +from ..monoid.numpy import _monoid_identities + +_semiring_names = { + f'{monoid_name}_{binary_name}' + for monoid_name, binary_name in itertools.product(_monoid_identities, _binary_names) +} + +# Remove incompatible combinations +# _ +_semiring_names -= { + f'{monoid_name}_{binary_name}' + for monoid_name, binary_name in itertools.product( + {'equal', 'hypot', 'logaddexp', 'logaddexp2', 'logical_and', 'logical_or', 'logical_xor'}, + {'gcd', 'lcm', 'left_shift', 'right_shift'} + ) +} +# _ +_semiring_names -= { + f'{monoid_name}_{binary_name}' + for monoid_name, binary_name in itertools.product( + {'bitwise_and', 'bitwise_or', 'bitwise_xor', 'equal', 'gcd', 'logical_and', 'logical_or', 'logical_xor'}, + {'arctan2', 'copysign', 'divide', 'hypot', 'ldexp', 'logaddexp2', 'logaddexp', 'nextafter', 'true_divide'} + ) +} +# _ +_semiring_names -= { + f'{monoid_name}_{binary_name}' + for monoid_name, binary_name in itertools.product( + {'hypot', 'logaddexp', 'logaddexp2'}, + {'bitwise_and', 'bitwise_or', 'bitwise_xor'} + ) +} +# _ +_semiring_names -= { + f'{monoid_name}_{binary_name}' + for monoid_name, binary_name in itertools.product( + {'equal', 'logical_and', 'logical_or', 'logical_xor'}, + {'floor_divide', 'fmod', 'mod', 'power', 'remainder', 'subtract'} + ) +} +# _ +_semiring_names -= { + f'{monoid_name}_{binary_name}' + for monoid_name, binary_name in itertools.product( + {'gcd', 'hypot', 'logaddexp', 'logaddexp2'}, + {'equal', 'greater', 'greater_equal', 'less', 'less_equal', + 'logical_and', 'logical_or', 'logical_xor', 'not_equal'} + ) +} +# XXX: we can't handle any semirings with bool at the moment +# _ +_semiring_names -= { + f'{monoid_name}_{binary_name}' + for monoid_name, binary_name in itertools.product( + {'add', 'bitwise_and', 'bitwise_or', 'bitwise_xor', 'fmax', 'fmin', 'maximum', 'minimum', 'multiply'}, + {'equal', 'greater', 'greater_equal', 'less', 'less_equal', + 'logical_and', 'logical_or', 'logical_xor', 'not_equal'} + ) +} +# _ +_semiring_names -= { + f'{monoid_name}_{binary_name}' + for monoid_name, binary_name in itertools.product( + {'equal', 'logical_and', 'logical_or', 'logical_xor'}, + {'add', 'bitwise_and', 'bitwise_or', 'bitwise_xor', 'equal', 'fmax', 'fmin', + 'greater', 'greater_equal', 'less', 'less_equal', 'logical_and', 'logical_or', + 'logical_xor', 'maximum', 'minimum', 'multiply', 'not_equal'} + ) +} + + +def __dir__(): + return list(_semiring_names) + + +def __getattr__(name): + if name not in _semiring_names: + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") + words = name.split('_') + for i in range(1, len(words)): + monoid_name = '_'.join(words[:i]) + if not hasattr(monoid.numpy, monoid_name): + continue + binary_name = '_'.join(words[i:]) + if hasattr(binary.numpy, binary_name): + break + ops.Semiring.register_new( + f'numpy.{name}', + getattr(monoid.numpy, monoid_name), + getattr(binary.numpy, binary_name), + ) + return globals()[name] diff --git a/grblas/unary.py b/grblas/unary/__init__.py similarity index 100% rename from grblas/unary.py rename to grblas/unary/__init__.py diff --git a/grblas/unary/numpy.py b/grblas/unary/numpy.py new file mode 100644 index 000000000..7386e2882 --- /dev/null +++ b/grblas/unary/numpy.py @@ -0,0 +1,82 @@ +""" Create UDFs of numpy functions supported by numba. + +See list of numpy ufuncs supported by numpy here: + +https://numba.pydata.org/numba-doc/dev/reference/numpysupported.html#math-operations + +""" +import numpy as np +from .. import ops, unary + +_unary_names = { + # Math operations + 'negative', + 'abs', + 'absolute', + 'fabs', + 'rint', + 'sign', + 'conj', + 'exp', + 'exp2', + 'log', + 'log2', + 'log10', + 'expm1', + 'log1p', + 'sqrt', + 'square', + 'reciprocal', + 'conjugate', + + # Trigonometric functions + 'sin', + 'cos', + 'tan', + 'arcsin', + 'arccos', + 'arctan', + 'sinh', + 'cosh', + 'tanh', + 'arcsinh', + 'arccosh', + 'arctanh', + 'deg2rad', + 'rad2deg', + 'degrees', + 'radians', + + # Bit-twiddling functions + 'bitwise_not', + 'invert', + + # Comparison functions + 'logical_not', + + # Floating functions + 'isfinite', + 'isinf', + 'isnan', + 'signbit', + 'floor', + 'ceil', + 'trunc', + 'spacing', +} + + +def __dir__(): + return list(_unary_names) + + +def __getattr__(name): + if name not in _unary_names: + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") + numpy_func = getattr(np, name) + ops.UnaryOp.register_new(f'numpy.{name}', lambda x: numpy_func(x)) + rv = globals()[name] + if name in {'invert', 'bitwise_not'}: + # numba has difficulty compiling with bool dtypes, so fix our hack + rv._specific_types['BOOL'] = unary.numpy.logical_not._specific_types['BOOL'] + return rv diff --git a/grblas/vector.py b/grblas/vector.py index 9919df8d4..031b7843e 100644 --- a/grblas/vector.py +++ b/grblas/vector.py @@ -1,11 +1,19 @@ -from functools import partial +from functools import lru_cache, partial from .base import lib, ffi, GbContainer, GbDelayed from .scalar import Scalar from .ops import BinaryOp, find_opclass, find_return_type, reify_op -from . import dtypes, unary, binary, monoid, semiring +from . import dtypes, binary, monoid, semiring from .exceptions import check_status, is_error, NoValue +@lru_cache(maxsize=1024) +def _generate_isclose(rel_tol, abs_tol): + # numba will inline the current values of `rel_tol` and `abs_tol` below + def isclose(x, y): + return x == y or abs(x - y) <= max(rel_tol * max(abs(x), abs(y)), abs_tol) + return BinaryOp.register_anonymous(isclose, f'isclose(rel_tol={rel_tol:g}, abs_tol={abs_tol:g})') + + class Vector(GbContainer): """ GraphBLAS Sparse Vector @@ -55,7 +63,7 @@ def isclose(self, other, *, rel_tol=1e-7, abs_tol=0.0, check_dtype=False): """ Check for approximate equality (including same size and empty values) If `check_dtype` is True, also checks that dtypes match - Closeness check is equivalent to `abs(a-b) <= max(rtol * max(abs(a), abs(b)), atol)` + Closeness check is equivalent to `abs(a-b) <= max(rel_tol * max(abs(a), abs(b)), abs_tol)` """ if type(other) is not self.__class__: return False @@ -65,29 +73,15 @@ def isclose(self, other, *, rel_tol=1e-7, abs_tol=0.0, check_dtype=False): return False if self.nvals != other.nvals: return False - if check_dtype: - # dtypes are equivalent, so not need to unify - common_dtype = self.dtype - else: - common_dtype = dtypes.unify(self.dtype, other.dtype) - matches = Vector.new(bool, self.size) - tmp1 = self.apply(unary.abs).new(dtype=common_dtype) - tmp2 = other.apply(unary.abs).new(dtype=common_dtype) - tmp1 << tmp1.ewise_mult(tmp2, monoid.max) + isclose = _generate_isclose(rel_tol, abs_tol) + matches = self.ewise_mult(other, isclose).new(dtype=bool) # ewise_mult performs intersection, so nvals will indicate mismatched empty values - if tmp1.nvals != self.nvals: + if matches.nvals != self.nvals: return False - tmp1[:](mask=tmp1.S, accum=binary.times) << rel_tol - tmp1[:](mask=tmp1.S, accum=binary.max) << abs_tol - tmp2 << self.ewise_mult(other, binary.minus) - tmp2 << tmp2.apply(unary.abs) - matches << tmp2.ewise_mult(tmp1, binary.le[common_dtype]) # Check if all results are True - result = Scalar.new(bool) - result << matches.reduce(monoid.land) - return result.value + return matches.reduce(monoid.land).value def __len__(self): return self.nvals diff --git a/recipe/meta.yaml b/recipe/meta.yaml index 49d34bf77..1206f9230 100644 --- a/recipe/meta.yaml +++ b/recipe/meta.yaml @@ -16,7 +16,7 @@ build: requirements: build: - {{ compiler('c') }} - + host: - python - pip @@ -43,4 +43,4 @@ about: extra: recipe-maintainers: - - jim22k \ No newline at end of file + - jim22k diff --git a/setup.cfg b/setup.cfg index ea329e4aa..604ff03e6 100644 --- a/setup.cfg +++ b/setup.cfg @@ -2,4 +2,8 @@ test=pytest [flake8] -max-line-length = 120 \ No newline at end of file +max-line-length = 120 + +[tool:pytest] +markers: + slow: Skipped unless --runslow passed diff --git a/test/conftest.py b/test/conftest.py index 1f8dde0d5..89e937164 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -1,7 +1,11 @@ +import pytest + + def pytest_addoption(parser): parser.addoption( "--backend", action="store", default="suitesparse", help="name of a backend in grblas.backends" ) + parser.addoption("--runslow", action="store_true", help="run slow tests") def pytest_configure(config): @@ -9,3 +13,8 @@ def pytest_configure(config): import grblas grblas.init(backend) print(f'Running tests with "{backend}" backend') + + +def pytest_runtest_setup(item): + if "slow" in item.keywords and not item.config.getoption("--runslow"): + pytest.skip("need --runslow option to run") diff --git a/test/test_numpyops.py b/test/test_numpyops.py new file mode 100644 index 000000000..5edab54df --- /dev/null +++ b/test/test_numpyops.py @@ -0,0 +1,195 @@ +# These tests are very slow, since they force creation of all numpy unary, binary, monoid, and semiring objects +import pytest +import numpy as np +import itertools +import grblas +import grblas.unary.numpy as npunary +import grblas.binary.numpy as npbinary +import grblas.monoid.numpy as npmonoid +import grblas.semiring.numpy as npsemiring + + +@pytest.mark.slow +def test_npunary(): + L = list(range(5)) + data = [ + [grblas.Vector.from_values([0, 1], [True, False]), np.array([True, False])], + [grblas.Vector.from_values(L, L), np.array(L, dtype=int)], + [grblas.Vector.from_values(L, L, dtype='float64'), np.array(L, dtype=np.float64)], + ] + blacklist = {} + isclose = grblas.vector._generate_isclose(1e-7, 0) + for gb_input, np_input in data: + for unary_name in sorted(npunary._unary_names): + op = getattr(npunary, unary_name) + if gb_input.dtype.name not in op.types or unary_name in blacklist.get(gb_input.dtype.name, ()): + continue + with np.errstate(divide='ignore', over='ignore', under='ignore', invalid='ignore'): + gb_result = gb_input.apply(op).new() + if gb_input.dtype == 'BOOL' and gb_result.dtype == 'FP32': + np_result = getattr(np, unary_name)(np_input, dtype='float32') + compare_op = isclose + else: + np_result = getattr(np, unary_name)(np_input) + compare_op = npbinary.equal + np_result = grblas.Vector.from_values(list(range(np_input.size)), list(np_result), dtype=gb_result.dtype) + assert gb_result.nvals == np_result.size + match = gb_result.ewise_mult(np_result, compare_op).new() + match(accum=grblas.binary.lor) << gb_result.apply(npunary.isnan) + compare = match.reduce(grblas.monoid.land).value + if not compare: + print(unary_name, gb_input.dtype) + print(gb_result.show()) + print(np_result.show()) + assert compare + + +@pytest.mark.slow +def test_npbinary(): + values1 = [0, 0, 1, 1, 2, 5] + values2 = [0, 1, 0, 1, 3, 8] + index = list(range(len(values1))) + data = [ + [ + [ + grblas.Vector.from_values(index, values1), + grblas.Vector.from_values(index, values2), + ], + [ + np.array(values1, dtype=int), + np.array(values2, dtype=int), + ], + ], + [ + [ + grblas.Vector.from_values(index, values1, dtype='float64'), + grblas.Vector.from_values(index, values2, dtype='float64'), + ], + [ + np.array(values1, dtype=np.float64), + np.array(values2, dtype=np.float64), + ], + ], + [ + [ + grblas.Vector.from_values([0, 1, 2, 3], [True, False, True, False]), + grblas.Vector.from_values([0, 1, 2, 3], [True, True, False, False]), + ], + [ + np.array([True, False, True, False]), + np.array([True, True, False, False]), + ], + ], + ] + blacklist = { + 'FP64': { + 'floor_divide', # numba/numpy difference for 1.0 / 0.0 + }, + } + isclose = grblas.vector._generate_isclose(1e-7, 0) + for (gb_left, gb_right), (np_left, np_right) in data: + for binary_name in sorted(npbinary._binary_names): + op = getattr(npbinary, binary_name) + if gb_left.dtype.name not in op.types or binary_name in blacklist.get(gb_left.dtype.name, ()): + continue + with np.errstate(divide='ignore', over='ignore', under='ignore', invalid='ignore'): + gb_result = gb_left.ewise_mult(gb_right, op).new() + if gb_left.dtype == 'BOOL' and gb_result.dtype == 'FP32': + np_result = getattr(np, binary_name)(np_left, np_right, dtype='float32') + compare_op = isclose + else: + np_result = getattr(np, binary_name)(np_left, np_right) + compare_op = npbinary.equal + np_result = grblas.Vector.from_values(list(range(np_left.size)), list(np_result), dtype=gb_result.dtype) + assert gb_result.nvals == np_result.size + match = gb_result.ewise_mult(np_result, compare_op).new() + match(accum=grblas.binary.lor) << gb_result.apply(npunary.isnan) + compare = match.reduce(grblas.monoid.land).value + if not compare: + print(binary_name, gb_left.dtype) + print(gb_result.show()) + print(np_result.show()) + assert compare + + +@pytest.mark.slow +def test_npmonoid(): + values1 = [0, 0, 1, 1, 2, 5] + values2 = [0, 1, 0, 1, 3, 8] + index = list(range(len(values1))) + data = [ + [ + [ + grblas.Vector.from_values(index, values1), + grblas.Vector.from_values(index, values2), + ], + [ + np.array(values1, dtype=int), + np.array(values2, dtype=int), + ], + ], + [ + [ + grblas.Vector.from_values(index, values1, dtype='float64'), + grblas.Vector.from_values(index, values2, dtype='float64'), + ], + [ + np.array(values1, dtype=np.float64), + np.array(values2, dtype=np.float64), + ], + ], + [ + [ + grblas.Vector.from_values([0, 1, 2, 3], [True, False, True, False]), + grblas.Vector.from_values([0, 1, 2, 3], [True, True, False, False]), + ], + [ + np.array([True, False, True, False]), + np.array([True, True, False, False]), + ], + ], + ] + blacklist = {} + for (gb_left, gb_right), (np_left, np_right) in data: + for binary_name in sorted(npmonoid._monoid_identities): + op = getattr(npmonoid, binary_name) + assert len(op.types) > 0, op.name + if gb_left.dtype.name not in op.types or binary_name in blacklist.get(gb_left.dtype.name, ()): + continue + with np.errstate(divide='ignore', over='ignore', under='ignore', invalid='ignore'): + gb_result = gb_left.ewise_mult(gb_right, op).new() + np_result = getattr(np, binary_name)(np_left, np_right) + np_result = grblas.Vector.from_values(list(range(np_left.size)), list(np_result), dtype=gb_result.dtype) + assert gb_result.nvals == np_result.size + match = gb_result.ewise_mult(np_result, npbinary.equal).new() + match(accum=grblas.binary.lor) << gb_result.apply(npunary.isnan) + compare = match.reduce(grblas.monoid.land).value + if not compare: + print(binary_name, gb_left.dtype) + print(gb_result.show()) + print(np_result.show()) + assert compare + + gb_result = gb_left.reduce(op).new() + np_result = getattr(np, binary_name).reduce(np_left) + assert gb_result.value == np_result + + gb_result = gb_right.reduce(op).new() + np_result = getattr(np, binary_name).reduce(np_right) + assert gb_result.value == np_result + + +@pytest.mark.slow +def test_npsemiring(): + for monoid_name, binary_name in itertools.product( + sorted(npmonoid._monoid_identities), + sorted(npbinary._binary_names) + ): + monoid = getattr(npmonoid, monoid_name) + binary = getattr(npbinary, binary_name) + name = monoid.name.split(".")[-1] + "_" + binary.name.split(".")[-1] + semiring = grblas.ops.Semiring.register_anonymous(monoid, binary, name) + if len(semiring.types) == 0: + assert not hasattr(npsemiring, semiring.name), name + else: + assert hasattr(npsemiring, semiring.name), name diff --git a/test/test_op.py b/test/test_op.py index 4a46f22a2..4d31b8e2c 100644 --- a/test/test_op.py +++ b/test/test_op.py @@ -58,7 +58,7 @@ def plus_one(x): assert hasattr(unary, 'plus_one') assert unary.plus_one.types == {'INT8', 'INT16', 'INT32', 'INT64', 'UINT8', 'UINT16', 'UINT32', 'UINT64', - 'FP32', 'FP64'} + 'FP32', 'FP64', 'BOOL'} v = Vector.from_values([0, 1, 3], [1, 2, -4], dtype=dtypes.INT32) v << v.apply(unary.plus_one) result = Vector.from_values([0, 1, 3], [2, 3, -3], dtype=dtypes.INT32) @@ -66,18 +66,18 @@ def plus_one(x): def test_unaryop_udf_bool_result(): - pytest.xfail('not sure why numba has trouble compiling this') - # def is_positive(x): - # return x > 0 - # unary.register_new('is_positive', is_positive) - # assert hasattr(UnaryOp, 'is_positive') - # assert unary.is_positive.types == {'INT8', 'INT16', 'INT32', 'INT64', - # 'UINT8', 'UINT16', 'UINT32', 'UINT64', - # 'FP32', 'FP64'} - # v = Vector.from_values([0,1,3], [1,2,-4], dtype=dtypes.INT32) - # w = v.apply(unary.is_positive).new() - # result = Vector.from_values([0,1,3], [True,True,False], dtype=dtypes.BOOL) - # assert v == result + # numba has trouble compiling this, but we have a work-around + def is_positive(x): + return x > 0 + UnaryOp.register_new('is_positive', is_positive) + assert hasattr(unary, 'is_positive') + assert unary.is_positive.types == {'INT8', 'INT16', 'INT32', 'INT64', + 'UINT8', 'UINT16', 'UINT32', 'UINT64', + 'FP32', 'FP64', 'BOOL'} + v = Vector.from_values([0, 1, 3], [1, 2, -4], dtype=dtypes.INT32) + w = v.apply(unary.is_positive).new() + result = Vector.from_values([0, 1, 3], [True, True, False], dtype=dtypes.BOOL) + assert w.isequal(result) def test_binaryop_udf(): @@ -101,6 +101,7 @@ def plus_plus_one(x, y): BinaryOp.register_new('plus_plus_one', plus_plus_one) Monoid.register_new('plus_plus_one', binary.plus_plus_one, -1) assert hasattr(monoid, 'plus_plus_one') + # No boolean for monoids yet assert monoid.plus_plus_one.types == {'INT8', 'INT16', 'INT32', 'INT64', 'UINT8', 'UINT16', 'UINT32', 'UINT64', 'FP32', 'FP64'} @@ -149,7 +150,7 @@ def plus_three(x): assert hasattr(unary.incrementers, 'plus_three') assert unary.incrementers.plus_three.types == {'INT8', 'INT16', 'INT32', 'INT64', 'UINT8', 'UINT16', 'UINT32', 'UINT64', - 'FP32', 'FP64'} + 'FP32', 'FP64', 'BOOL'} v = Vector.from_values([0, 1, 3], [1, 2, -4], dtype=dtypes.INT32) v << v.apply(unary.incrementers.plus_three) result = Vector.from_values([0, 1, 3], [4, 5, -1], dtype=dtypes.INT32) diff --git a/test/test_vector.py b/test/test_vector.py index 687525f24..5dc2b86ee 100644 --- a/test/test_vector.py +++ b/test/test_vector.py @@ -1,4 +1,5 @@ import pytest +import numpy as np from grblas import Matrix, Vector, Scalar from grblas import unary, binary, monoid, semiring from grblas import dtypes @@ -332,6 +333,9 @@ def test_isclose(v): assert u5.isclose(v) u6 = Vector.from_values([1, 3, 4, 6], [1., 1 + 1e-4, 1.99999, 0.]) assert u6.isclose(v, rel_tol=1e-3) + # isclose should consider `inf == inf` + u7 = Vector.from_values([1, 3], [-np.inf, np.inf]) + assert u7.isclose(u7, rel_tol=1e-8) def test_binary_op(v):